From 4186f2d0eaae19f62175bccb10cc2a28d0d8461c Mon Sep 17 00:00:00 2001
From: "anthony.wen" <awen01@gmail.com>
Date: Fri, 27 Mar 2026 13:51:12 -0400
Subject: [PATCH] Persist Currents run URLs for ATVM watcher notes

---
 atvm/docs/automation/run-learnings.md    |  9 ++++
 atvm/docs/automation/status-template.md  |  1 +
 atvm/watcher-service/atvm_run_watcher.py | 62 +++++++++++++++++++-----
 3 files changed, 60 insertions(+), 12 deletions(-)
diff --git a/atvm/docs/automation/run-learnings.md b/atvm/docs/automation/run-learnings.md
index da073fc..a6b3afa 100644
--- a/atvm/docs/automation/run-learnings.md
+++ b/atvm/docs/automation/run-learnings.md
@@ -259,6 +259,15 @@ This file stores run-specific examples only when a run produced a new learning r
 - Action for future runs:
   - Render ATVM status output in that section order for both local output and Mattermost posts.
 
+## Run Learning: 2026-03-27 (Persist the Currents run URL outside the transient runner log)
+- Observed failure mode:
+  - The watcher can include the Currents run URL in `NOTES:`, but only if it can still read the URL from live runner output or a consolidated run log.
+  - In practice, `/tmp/<build-name>.log` is not guaranteed to exist, and the host reporter artifacts do not preserve the final Currents run URL.
+- Action for future runs:
+  - Persist the Currents `Recorded Run` URL as soon as `run-sorry-cypress.py` sees it.
+  - Store it under the watcher state directory for the parent build so it survives runner exit and missing log files.
+  - Prefer the persisted Currents URL store over transient log scraping when building the final `NOTES:` section.
+
 ## Run Learning: 2026-03-27 (Default ATVM approval should include the watcher)
 - Observed requirement:
   - The operator wants `approve` to mean run with watcher by default.
diff --git a/atvm/docs/automation/status-template.md b/atvm/docs/automation/status-template.md
index 5874815..4056371 100644
--- a/atvm/docs/automation/status-template.md
+++ b/atvm/docs/automation/status-template.md
@@ -71,6 +71,7 @@ Use this as the default ATVM automation run-status template for:
   - `⏭️ SKIP`
 - Keep `Detail` concise.
 - Put broader context under `NOTES:`, not in the host table.
+- When available, put the persistent Currents run URL in `NOTES:` so operators can open the exact recorded run directly.
 - `COVERAGE:` should describe what the run was intended to cover without listing target hosts.
 - `TEST FLOW:` should describe the template-specific numbered run flow once for the whole test, not per host.
 - The watcher resolves `TEST FLOW:` from the run template name.
diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py
index 70b8257..5c2d2cc 100644
--- a/atvm/watcher-service/atvm_run_watcher.py
+++ b/atvm/watcher-service/atvm_run_watcher.py
@@ -215,6 +215,39 @@ def write_state(state_file: Path, state: Dict[str, object]) -> None:
     state_file.write_text(json.dumps(state, indent=2, sort_keys=True), encoding="utf-8")
 
 
+def load_currents_store(build_dir: Path) -> Dict[str, object]:
+    store_path = build_dir / "currents_urls.json"
+    if not store_path.exists():
+        return {}
+    try:
+        loaded = json.loads(store_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return {}
+    return loaded if isinstance(loaded, dict) else {}
+
+
+def latest_currents_url(build_dir: Path) -> Optional[str]:
+    store = load_currents_store(build_dir)
+    latest_url = store.get("latest_url")
+    return latest_url if isinstance(latest_url, str) and latest_url else None
+
+
+def persisted_currents_url_for_build(build_dir: Path, build_id: Optional[str]) -> Optional[str]:
+    store = load_currents_store(build_dir)
+    by_build_id = store.get("by_build_id")
+    if not isinstance(by_build_id, dict):
+        return latest_currents_url(build_dir)
+    if build_id:
+        entry = by_build_id.get(build_id)
+        if isinstance(entry, dict):
+            url = entry.get("url")
+            if isinstance(url, str) and url:
+                return url
+        if isinstance(entry, str) and entry:
+            return entry
+    return latest_currents_url(build_dir)
+
+
 def parse_xml_timestamp(raw: Optional[str]) -> Optional[datetime]:
     if not raw:
         return None
@@ -811,7 +844,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                 started_at=default_started_at,
                 expected_hosts=extract_expected_hosts(log_text),
                 completed=False,
-                currents_url=extract_currents_url(log_text),
+                currents_url=None,
                 notes=[],
             )
         ]
@@ -828,7 +861,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                 started_at=default_started_at,
                 expected_hosts=extract_expected_hosts(log_text),
                 completed=False,
-                currents_url=extract_currents_url(log_text),
+                currents_url=None,
                 notes=["Categorized mode was requested but no sub-run segment has appeared in the log yet."],
             )
         ]
@@ -848,7 +881,7 @@ def split_log_segments(log_text: str, parent_build_name: str, categorized: bool,
                 started_at=start_ts or default_started_at,
                 expected_hosts=expected_hosts,
                 completed=index < len(segment_starts),
-                currents_url=extract_currents_url(segment_text),
+                currents_url=None,
                 notes=[f"Categorized sub-run {index} of {len(segment_starts)}."],
             )
         )
@@ -859,11 +892,13 @@ def evaluate_subrun(
     subrun: SubRun,
     reporter_root: Path,
     inventory: Dict[str, str],
+    build_dir: Path,
     end_boundary: Optional[datetime],
     parent_active: bool,
     cancelled: bool,
 ) -> Tuple[str, Dict[str, HostResult], Optional[datetime], Optional[datetime], Optional[str], List[str]]:
     notes = list(subrun.notes)
+    currents_url = subrun.currents_url or persisted_currents_url_for_build(build_dir, subrun.display_name)
     host_results = collect_host_results(
         reporter_root=reporter_root,
         expected_hosts=subrun.expected_hosts,
@@ -881,17 +916,17 @@ def evaluate_subrun(
 
     if cancelled:
         notes.append("Cancellation marker detected.")
-        return "CANCELLED", host_results, start_ts, end_ts, subrun.currents_url, notes
+        return "CANCELLED", host_results, start_ts, end_ts, currents_url, notes
 
     if subrun.completed:
         if not host_results:
             notes.append("This categorized sub-run ended but no host results were detected.")
-            return "UNKNOWN", host_results, start_ts, end_ts, subrun.currents_url, notes
+            return "UNKNOWN", host_results, start_ts, end_ts, currents_url, notes
         notes.append("Categorized sub-run completed and the next grouped run was launched.")
         if check_end:
             notes.append("Final `check-xml-files.ts` validation passed.")
         state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
-        return state, host_results, start_ts, end_ts, subrun.currents_url, notes
+        return state, host_results, start_ts, end_ts, currents_url, notes
 
     if parent_active:
         current_host = next((host for host in subrun.expected_hosts if host not in host_results), None)
@@ -902,7 +937,7 @@ def evaluate_subrun(
                 status="RUN",
                 detail="in progress",
             )
-        return "RUNNING", host_results, start_ts, end_ts, subrun.currents_url, notes
+        return "RUNNING", host_results, start_ts, end_ts, currents_url, notes
 
     if check_end and not host_results:
         latest_host = collect_latest_host_reporter_artifact(
@@ -924,14 +959,15 @@ def evaluate_subrun(
             if latest_artifact_note not in notes and all(result.tests == 0 for result in host_results.values()):
                 notes.append(latest_artifact_note)
         state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED"
-        return state, host_results, start_ts, end_ts, subrun.currents_url, notes
+        return state, host_results, start_ts, end_ts, currents_url, notes
 
     notes.append("Run process exited before host results were detected.")
-    return "TERMINATED", host_results, start_ts, end_ts, subrun.currents_url, notes
+    return "TERMINATED", host_results, start_ts, end_ts, currents_url, notes
 
 
 def discover_categorized_subruns(
     build_name: str,
+    build_dir: Path,
     reporter_root: Path,
     inventory: Dict[str, str],
     log_text: str,
@@ -1030,7 +1066,7 @@ def discover_categorized_subruns(
                 host_results=host_results,
                 start_ts=start_ts,
                 end_ts=end_ts,
-                currents_url=summary["currents_url"] if summary else None,
+                currents_url=(summary["currents_url"] if summary else None) or persisted_currents_url_for_build(build_dir, raw_display_name),
                 notes=notes,
             )
 
@@ -1059,7 +1095,7 @@ def discover_categorized_subruns(
             host_results=host_results,
             start_ts=started_at,
             end_ts=None,
-            currents_url=None,
+            currents_url=persisted_currents_url_for_build(build_dir, current_subrun_build),
             notes=notes,
         )
 
@@ -1088,6 +1124,7 @@ def determine_state(
     if metadata.get("categorized"):
         subrun_states = discover_categorized_subruns(
             build_name=build_name,
+            build_dir=build_dir,
             reporter_root=reporter_root,
             inventory=inventory,
             log_text=log_text,
@@ -1106,6 +1143,7 @@ def determine_state(
                 subrun=subrun,
                 reporter_root=reporter_root,
                 inventory=inventory,
+                build_dir=build_dir,
                 end_boundary=next_started_at,
                 parent_active=active,
                 cancelled=cancelled,
@@ -1150,7 +1188,7 @@ def determine_state(
     parent_end_candidates = [subrun["end_ts"] for subrun in subrun_states if subrun["end_ts"]]
     start_ts = min(parent_start_candidates) if parent_start_candidates else started_at
     end_ts = max(parent_end_candidates) if parent_end_candidates else find_check_xml_end(reporter_root, started_at)
-    currents_url = extract_currents_url(log_text)
+    currents_url = extract_currents_url(log_text) or latest_currents_url(build_dir)
 
     if cancelled:
         notes.append("Cancellation marker detected.")