Keep categorized ATVM watcher alive until parent run finishes
- update the watcher to treat categorized parent-run activity as the authoritative signal for whether the overall request is still running - prevent the watcher from exiting early just because one categorized grouped sub-run completed and wrote artifacts - document that categorized watcher instances must remain alive between grouped runs until the parent request has actually gone inactive past the grace window - update the ATVM guide, watcher design, and install docs to reflect the stricter categorized parent-run completion rule
This commit is contained in:
@@ -108,6 +108,16 @@ def process_active(build_name: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def related_process_active(build_name: str) -> bool:
|
||||
output = run_ps()
|
||||
for line in output.splitlines():
|
||||
if build_name not in line:
|
||||
continue
|
||||
if any(token in line for token in ("run-sorry-cypress.py", "cypress-cloud", "node ")):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def extract_active_subrun_build(build_name: str) -> Optional[str]:
|
||||
output = run_ps()
|
||||
matches: List[str] = []
|
||||
@@ -713,7 +723,7 @@ def determine_state(
|
||||
) -> Tuple[str, List[Dict[str, object]], Dict[str, HostResult], Optional[datetime], Optional[datetime], Optional[str], List[str]]:
|
||||
cancelled_marker = build_dir / "cancelled.marker"
|
||||
log_text = read_text(run_log)
|
||||
active = process_active(build_name)
|
||||
active = related_process_active(build_name) if metadata.get("categorized") else process_active(build_name)
|
||||
cancelled = cancelled_marker.exists()
|
||||
notes: List[str] = []
|
||||
subrun_states: List[Dict[str, object]] = []
|
||||
@@ -776,6 +786,10 @@ def determine_state(
|
||||
return "HUNG", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
||||
return "RUNNING", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
||||
|
||||
if metadata.get("categorized") and process_gone_since and (now_utc() - process_gone_since).total_seconds() < process_exit_grace_seconds:
|
||||
notes.append("Categorized parent runner has not been gone long enough to treat the request as finished.")
|
||||
return "RUNNING", subrun_states, parent_host_results, start_ts, end_ts, currents_url, notes
|
||||
|
||||
terminal_subruns = [subrun for subrun in subrun_states if subrun["state"] in {"COMPLETED", "FAILED"}]
|
||||
if terminal_subruns:
|
||||
state = "FAILED" if any(result.failures for result in parent_host_results.values()) else "COMPLETED"
|
||||
|
||||
Reference in New Issue
Block a user