Persist Currents run URLs for ATVM watcher notes

2026-03-27 13:51:12 -04:00
parent 3b28f877ef
commit 4186f2d0ea
3 changed files with 60 additions and 12 deletions
--- a/atvm/docs/automation/run-learnings.md
+++ b/atvm/docs/automation/run-learnings.md
@@ -259,6 +259,15 @@ This file stores run-specific examples only when a run produced a new learning r
 - Action for future runs:
  - Render ATVM status output in that section order for both local output and Mattermost posts.

+## Run Learning: 2026-03-27 (Persist the Currents run URL outside the transient runner log)
+- Observed failure mode:
+  - The watcher can include the Currents run URL in `NOTES:`, but only if it can still read the URL from live runner output or a consolidated run log.
+  - In practice, `/tmp/<build-name>.log` is not guaranteed to exist, and the host reporter artifacts do not preserve the final Currents run URL.
+- Action for future runs:
+  - Persist the Currents `Recorded Run` URL as soon as `run-sorry-cypress.py` sees it.
+  - Store it under the watcher state directory for the parent build so it survives runner exit and missing log files.
+  - Prefer the persisted Currents URL store over transient log scraping when building the final `NOTES:` section.
+
 ## Run Learning: 2026-03-27 (Default ATVM approval should include the watcher)
 - Observed requirement:
  - The operator wants `approve` to mean run with watcher by default.
--- a/atvm/docs/automation/status-template.md
+++ b/atvm/docs/automation/status-template.md
@@ -71,6 +71,7 @@ Use this as the default ATVM automation run-status template for:
  - `⏭️ SKIP`
 - Keep `Detail` concise.
 - Put broader context under `NOTES:`, not in the host table.
+- When available, put the persistent Currents run URL in `NOTES:` so operators can open the exact recorded run directly.
 - `COVERAGE:` should describe what the run was intended to cover without listing target hosts.
 - `TEST FLOW:` should describe the template-specific numbered run flow once for the whole test, not per host.
 - The watcher resolves `TEST FLOW:` from the run template name.
--- a/atvm/watcher-service/atvm_run_watcher.py
+++ b/atvm/watcher-service/atvm_run_watcher.py
@@ -215,6 +215,39 @@ def write_state(state_file: Path, state: Dict[str, object]) -> None:
    state_file.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8")


+def load_currents_store(build_dir: Path) -> Dict[str, object]:
+    store_path = build_dir / "currents_urls.json"
+    if not store_path.exists():
+        return {}
+    try:
+        loaded = json.loads(store_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return {}
+    return loaded if isinstance(loaded, dict) else {}
+
+
+def latest_currents_url(build_dir: Path) -> Optional[str]:
+    store = load_currents_store(build_dir)
+    latest_url = store.get("latest_url")
+    return latest_url if isinstance(latest_url, str) and latest_url else None
+
+
+def persisted_currents_url_for_build(build_dir: Path, build_id: Optional[str]) -> Optional[str]:
+    store = load_currents_store(build_dir)
+    by_build_id = store.get("by_build_id")
+    if not isinstance(by_build_id, dict):
+        return latest_currents_url(build_dir)
+    if build_id:
+        entry = by_build_id.get(build_id)
+        if isinstance(entry, dict):
+            url = entry.get("url")
+            if isinstance(url, str) and url:
+                return url
+        if isinstance(entry, str) and entry:
+            return entry
+    return latest_currents_url(build_dir)
+
+
 def parse_xml_timestamp(raw: Optional[str]) -> Optional[datetime]:
    if not raw:
        return None
@@ -811,7 +844,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                started_at=default_started_at,
                expected_hosts=extract_expected_hosts(log_text),
                completed=False,
-                currents_url=extract_currents_url(log_text),
+                currents_url=None,
                notes=[],
            )
        ]
@@ -828,7 +861,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                started_at=default_started_at,
                expected_hosts=extract_expected_hosts(log_text),
                completed=False,
-                currents_url=extract_currents_url(log_text),
+                currents_url=None,
                notes=["Categorized mode was requested but no sub-run segment has appeared in the log yet."],
            )
        ]
@@ -848,7 +881,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                started_at=start_ts or default_started_at,
                expected_hosts=expected_hosts,
                completed=index < len(segment_starts),
-                currents_url=extract_currents_url(segment_text),
+                currents_url=None,
                notes=[f"Categorized sub-run {index} of {len(segment_starts)}."],
            )
        )
@@ -859,11 +892,13 @@ def evaluate_subrun(
    subrun: SubRun,
    reporter_root: Path,
    inventory: Dict[str, str],
+    build_dir: Path,
    end_boundary: Optional[datetime],
    parent_active: bool,
    cancelled: bool,
 ) -> Tuple[str, Dict[str, HostResult], Optional[datetime], Optional[datetime], Optional[str], List[str]]:
    notes = list(subrun.notes)
+    currents_url = subrun.currents_url or persisted_currents_url_for_build(build_dir, subrun.display_name)
    host_results = collect_host_results(
        reporter_root=reporter_root,
        expected_hosts=subrun.expected_hosts,
@@ -881,17 +916,17 @@ def evaluate_subrun(

    if cancelled:
        notes.append("Cancellation marker detected.")
-        return "CANCELLED", host_results, start_ts, end_ts, subrun.currents_url, notes
+        return "CANCELLED", host_results, start_ts, end_ts, currents_url, notes

    if subrun.completed:
        if not host_results:
            notes.append("This categorized sub-run ended but no host results were detected.")
-            return "UNKNOWN", host_results, start_ts, end_ts, subrun.currents_url, notes
+            return "UNKNOWN", host_results, start_ts, end_ts, currents_url, notes
        notes.append("Categorized sub-run completed and the next grouped run was launched.")
        if check_end:
            notes.append("Final `check-xml-files.ts` validation passed.")
        state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
-        return state, host_results, start_ts, end_ts, subrun.currents_url, notes
+        return state, host_results, start_ts, end_ts, currents_url, notes

    if parent_active:
        current_host = next((host for host in subrun.expected_hosts if host not in host_results), None)
@@ -902,7 +937,7 @@ def evaluate_subrun(
                status="RUN",
                detail="in progress",
            )
-        return "RUNNING", host_results, start_ts, end_ts, subrun.currents_url, notes
+        return "RUNNING", host_results, start_ts, end_ts, currents_url, notes

    if check_end and not host_results:
        latest_host = collect_latest_host_reporter_artifact(
@@ -924,14 +959,15 @@ def evaluate_subrun(
            if latest_artifact_note not in notes and all(result.tests == 0 for result in host_results.values()):
                notes.append(latest_artifact_note)
        state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
-        return state, host_results, start_ts, end_ts, subrun.currents_url, notes
+        return state, host_results, start_ts, end_ts, currents_url, notes

    notes.append("Run process exited before host results were detected.")
-    return "TERMINATED", host_results, start_ts, end_ts, subrun.currents_url, notes
+    return "TERMINATED", host_results, start_ts, end_ts, currents_url, notes


 def discover_categorized_subruns(
    build_name: str,
+    build_dir: Path,
    reporter_root: Path,
    inventory: Dict[str, str],
    log_text: str,
@@ -1030,7 +1066,7 @@ def discover_categorized_subruns(
                host_results=host_results,
                start_ts=start_ts,
                end_ts=end_ts,
-                currents_url=summary["currents_url"] if summary else None,
+                currents_url=(summary["currents_url"] if summary else None) or persisted_currents_url_for_build(build_dir, raw_display_name),
                notes=notes,
            )

@@ -1059,7 +1095,7 @@ def discover_categorized_subruns(
            host_results=host_results,
            start_ts=started_at,
            end_ts=None,
-            currents_url=None,
+            currents_url=persisted_currents_url_for_build(build_dir, current_subrun_build),
            notes=notes,
        )

@@ -1088,6 +1124,7 @@ def determine_state(
    if metadata.get("categorized"):
        subrun_states = discover_categorized_subruns(
            build_name=build_name,
+            build_dir=build_dir,
            reporter_root=reporter_root,
            inventory=inventory,
            log_text=log_text,
@@ -1106,6 +1143,7 @@ def determine_state(
                subrun=subrun,
                reporter_root=reporter_root,
                inventory=inventory,
+                build_dir=build_dir,
                end_boundary=next_started_at,
                parent_active=active,
                cancelled=cancelled,
@@ -1150,7 +1188,7 @@ def determine_state(
    parent_end_candidates = [subrun["end_ts"] for subrun in subrun_states if subrun["end_ts"]]
    start_ts = min(parent_start_candidates) if parent_start_candidates else started_at
    end_ts = max(parent_end_candidates) if parent_end_candidates else find_check_xml_end(reporter_root, started_at)
-    currents_url = extract_currents_url(log_text)
+    currents_url = extract_currents_url(log_text) or latest_currents_url(build_dir)

    if cancelled:
        notes.append("Cancellation marker detected.")