Infer categorized watcher group names from actual host execution
- update the watcher to stop trusting misleading categorized child build labels when they do not match the host/spec actually being executed - infer the reported categorized group name from the actual host being run, so mismatched labels like ubuntu-batch for a Red Hat host are corrected in status reporting - document the categorized watcher workaround in the ATVM guide, watcher design, and watcher README without changing the underlying ATVM runner scripts
This commit is contained in:
@@ -47,6 +47,7 @@ Run ATVM CMC automation tests on the designated automation VM without unintended
|
|||||||
- it must post one final Mattermost status per completed categorized group/sub-run
|
- it must post one final Mattermost status per completed categorized group/sub-run
|
||||||
- it must stay active between grouped sub-runs while the parent categorized request is still running
|
- it must stay active between grouped sub-runs while the parent categorized request is still running
|
||||||
- it must not stop after the first grouped run simply because one grouped run completed
|
- it must not stop after the first grouped run simply because one grouped run completed
|
||||||
|
- if the child build id label does not match the actual host/spec being executed, report the grouped run using the inferred host-based group instead of the raw child build id label
|
||||||
- it must not wait and replace those with one single parent-only post
|
- it must not wait and replace those with one single parent-only post
|
||||||
- After execution, report immediate success/failure only.
|
- After execution, report immediate success/failure only.
|
||||||
- Do not actively monitor completion unless explicitly requested.
|
- Do not actively monitor completion unless explicitly requested.
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ A categorized run must be treated differently:
|
|||||||
- then continue watching for the next grouped sub-run
|
- then continue watching for the next grouped sub-run
|
||||||
- the watcher must remain alive while the parent categorized request or related child Cypress process is still active
|
- the watcher must remain alive while the parent categorized request or related child Cypress process is still active
|
||||||
- one completed grouped sub-run must not be treated as proof that the parent categorized request is finished
|
- one completed grouped sub-run must not be treated as proof that the parent categorized request is finished
|
||||||
|
- if the child build id label does not match the actual host/spec being executed, the watcher must infer the real group from host execution and use that inferred group for reporting
|
||||||
- the watcher must not wait until the very end to send one single parent-only post
|
- the watcher must not wait until the very end to send one single parent-only post
|
||||||
|
|
||||||
Evidence sources:
|
Evidence sources:
|
||||||
|
|||||||
@@ -128,5 +128,6 @@ This writes a cancellation marker, updates `state.json` to `CANCELLED`, and stop
|
|||||||
- Kernel values are resolved from `atvm/inventory/vm-inventory.md`.
|
- Kernel values are resolved from `atvm/inventory/vm-inventory.md`.
|
||||||
- Categorized execution is treated as sequential grouped ATVM sub-runs, not as one parent run with internal phases.
|
- Categorized execution is treated as sequential grouped ATVM sub-runs, not as one parent run with internal phases.
|
||||||
- In categorized mode, the watcher writes per-subrun state under `subruns/` and posts each completed grouped run separately.
|
- In categorized mode, the watcher writes per-subrun state under `subruns/` and posts each completed grouped run separately.
|
||||||
|
- In categorized mode, if the child build id label does not match the host/spec actually being executed, the watcher reports the grouped run using the inferred host-based group name instead of trusting the raw child build id label.
|
||||||
- Best-practice controller install path: `/opt/atvm-watcher-service`.
|
- Best-practice controller install path: `/opt/atvm-watcher-service`.
|
||||||
- This package is local-only right now. Nothing here is installed on the controller yet.
|
- This package is local-only right now. Nothing here is installed on the controller yet.
|
||||||
|
|||||||
@@ -553,6 +553,38 @@ def infer_group_label(hosts: List[str], index: int) -> str:
|
|||||||
return "-".join(labels) if labels else f"group{index}"
|
return "-".join(labels) if labels else f"group{index}"
|
||||||
|
|
||||||
|
|
||||||
|
def infer_group_from_host(host: str) -> str:
|
||||||
|
short = host.split("-", 1)[-1].lower()
|
||||||
|
if short.startswith("w2k"):
|
||||||
|
return "windows"
|
||||||
|
if short.startswith("amazonlinux"):
|
||||||
|
return "amazonlinux"
|
||||||
|
if short.startswith("centos"):
|
||||||
|
return "centos"
|
||||||
|
if short.startswith("ubuntu"):
|
||||||
|
return "ubuntu"
|
||||||
|
if short.startswith("rocky"):
|
||||||
|
return "rocky"
|
||||||
|
if short.startswith("redhat"):
|
||||||
|
return "redhat"
|
||||||
|
if short.startswith("oracle"):
|
||||||
|
return "oracle"
|
||||||
|
if short.startswith("fedora"):
|
||||||
|
return "fedora"
|
||||||
|
if short.startswith("debian"):
|
||||||
|
return "debian"
|
||||||
|
if short.startswith("suse"):
|
||||||
|
return "suse"
|
||||||
|
return short or "group"
|
||||||
|
|
||||||
|
|
||||||
|
def corrected_categorized_display_name(raw_display_name: str, hosts: List[str]) -> str:
|
||||||
|
if not hosts:
|
||||||
|
return raw_display_name
|
||||||
|
group = infer_group_from_host(hosts[0])
|
||||||
|
return re.sub(r"-(amazonlinux|centos|ubuntu|rocky|redhat|oracle|fedora|debian|suse|windows|w2k)-batch", f"-{group}-batch", raw_display_name)
|
||||||
|
|
||||||
|
|
||||||
def extract_segment_build_name(segment_text: str, parent_build_name: str) -> Optional[str]:
|
def extract_segment_build_name(segment_text: str, parent_build_name: str) -> Optional[str]:
|
||||||
patterns = [
|
patterns = [
|
||||||
rf"({re.escape(parent_build_name)}-[A-Za-z0-9_.-]*batch\d+_\d+)",
|
rf"({re.escape(parent_build_name)}-[A-Za-z0-9_.-]*batch\d+_\d+)",
|
||||||
@@ -702,8 +734,8 @@ def discover_categorized_subruns(
|
|||||||
xml_mtime = datetime.fromtimestamp(xml_path.stat().st_mtime, tz=timezone.utc)
|
xml_mtime = datetime.fromtimestamp(xml_path.stat().st_mtime, tz=timezone.utc)
|
||||||
if xml_mtime < started_at:
|
if xml_mtime < started_at:
|
||||||
continue
|
continue
|
||||||
display_name = xml_path.stem[len("test-result-"):]
|
raw_display_name = xml_path.stem[len("test-result-"):]
|
||||||
discovered_builds.append(display_name)
|
discovered_builds.append(raw_display_name)
|
||||||
parsed = parse_host_xml(xml_path)
|
parsed = parse_host_xml(xml_path)
|
||||||
host_results: Dict[str, HostResult] = {}
|
host_results: Dict[str, HostResult] = {}
|
||||||
if parsed:
|
if parsed:
|
||||||
@@ -721,13 +753,16 @@ def discover_categorized_subruns(
|
|||||||
state = "RUNNING"
|
state = "RUNNING"
|
||||||
if cancelled:
|
if cancelled:
|
||||||
state = "CANCELLED"
|
state = "CANCELLED"
|
||||||
elif check_ts or display_name != current_subrun_build or not parent_active:
|
elif check_ts or raw_display_name != current_subrun_build or not parent_active:
|
||||||
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
|
||||||
|
display_name = corrected_categorized_display_name(raw_display_name, list(host_results))
|
||||||
notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."]
|
notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."]
|
||||||
if check_ts:
|
if check_ts:
|
||||||
notes.append("Final `check-xml-files.ts` validation passed.")
|
notes.append("Final `check-xml-files.ts` validation passed.")
|
||||||
if summary and host_results:
|
if summary and host_results:
|
||||||
notes.append("Host result details were derived from the parent categorized run log summary.")
|
notes.append("Host result details were derived from the parent categorized run log summary.")
|
||||||
|
if display_name != raw_display_name:
|
||||||
|
notes.append(f"Child build id was reported as `{raw_display_name}`, but the actual grouped run was inferred from host execution as `{display_name}`.")
|
||||||
if cancelled:
|
if cancelled:
|
||||||
notes.append("Cancellation marker detected.")
|
notes.append("Cancellation marker detected.")
|
||||||
end_ts = check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime)
|
end_ts = check_ts or next((result.timestamp for result in host_results.values() if result.timestamp), xml_mtime)
|
||||||
@@ -766,16 +801,22 @@ def discover_categorized_subruns(
|
|||||||
status="RUN",
|
status="RUN",
|
||||||
detail="in progress",
|
detail="in progress",
|
||||||
)
|
)
|
||||||
|
display_name = corrected_categorized_display_name(current_subrun_build, [current_host] if current_host else [])
|
||||||
|
notes = ["Active categorized sub-run inferred from live `--ci-build-id` process state."]
|
||||||
|
if display_name != current_subrun_build:
|
||||||
|
notes.append(f"Child build id is `{current_subrun_build}`, but the actual grouped run was inferred from host execution as `{display_name}`.")
|
||||||
|
if cancelled:
|
||||||
|
notes.append("Cancellation marker detected.")
|
||||||
subrun_states.append(
|
subrun_states.append(
|
||||||
{
|
{
|
||||||
"key": sanitize_key(current_subrun_build),
|
"key": sanitize_key(current_subrun_build),
|
||||||
"display_name": current_subrun_build,
|
"display_name": display_name,
|
||||||
"state": "CANCELLED" if cancelled else "RUNNING",
|
"state": "CANCELLED" if cancelled else "RUNNING",
|
||||||
"host_results": host_results,
|
"host_results": host_results,
|
||||||
"start_ts": started_at,
|
"start_ts": started_at,
|
||||||
"end_ts": None,
|
"end_ts": None,
|
||||||
"currents_url": None,
|
"currents_url": None,
|
||||||
"notes": ["Active categorized sub-run inferred from live `--ci-build-id` process state."] + (["Cancellation marker detected."] if cancelled else []),
|
"notes": notes,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user