From 70542a13a06e356f9a0e6c3a0cdd4c3df3668e16 Mon Sep 17 00:00:00 2001 From: "anthony.wen" Date: Wed, 15 Apr 2026 21:11:38 -0400 Subject: [PATCH] Fix ATVM watcher truncation after Cloud Run Finished Adjust parent-run summary parsing so the watcher does not stop at the "Recorded Run" detection log line and miss later host rows in Mattermost status output. Document the 2026-04-15 failure mode in ATVM run learnings so future watcher/reporting work preserves the broader Cloud Run Finished block parsing rule. --- atvm/docs/automation/run-learnings.md | 9 ++++++++ atvm/watcher-service/atvm_run_watcher.py | 28 +++++++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/atvm/docs/automation/run-learnings.md b/atvm/docs/automation/run-learnings.md index 055be47..37de4e4 100644 --- a/atvm/docs/automation/run-learnings.md +++ b/atvm/docs/automation/run-learnings.md @@ -544,3 +544,12 @@ This file stores run-specific examples only when a run produced a new learning r - Match plugin-gated generated-spec branches generically by plugin-bearing gate variable name instead of hardcoding only one template's variable names. - Apply the same plugin-branch filtering logic across ATVM templates so new templates do not need one-off watcher fixes. - Validate generated-spec `TEST FLOW` against the selected runtime plugin path for reboot and other templates before assuming the generic fix is complete. + +## Run Learning: 2026-04-15 (Parent `Cloud Run Finished` parsing must tolerate late host rows after Recorded Run detection) +- Observed failure mode: + - A non-categorized watcher run tested three VMs, but the Mattermost status only showed two hosts. + - In the launch log, the parent `Cloud Run Finished` summary printed one host row, then logged `Detected 'Recorded Run' after 'Cloud Run Finished' - results uploaded successfully.`, then printed the remaining host rows. + - The watcher treated that detection log line as the end of the summary block, so the merged parent-run summary dropped the later host row. +- Action for future runs: + - Do not stop parent summary parsing at the Recorded Run detection log line. + - Bound each `Cloud Run Finished` block by the next run boundary such as the next `Extracted specPattern:` or the next `Cloud Run Finished`, then parse all host rows inside that block. diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py index 88c517a..bd2344b 100644 --- a/atvm/watcher-service/atvm_run_watcher.py +++ b/atvm/watcher-service/atvm_run_watcher.py @@ -700,24 +700,26 @@ def extract_host_results_from_run_finished_segment(segment_text: str, inventory: def extract_completed_subrun_summaries(log_text: str, inventory: Dict[str, str]) -> List[Dict[str, object]]: summaries: List[Dict[str, object]] = [] - cloud_blocks = list( - re.finditer( - r"Cloud Run Finished(.*?)(?:🏁 Recorded Run:\s*(https://\S+)|\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - INFO - Detected 'Recorded Run' after 'Cloud Run Finished' - results uploaded successfully\.)", - log_text, - re.S, - ) - ) + cloud_starts = [match.start() for match in re.finditer(r"Cloud Run Finished", log_text)] previous_block_end = 0 - for block in cloud_blocks: - block_text = block.group(1) - currents_url = block.group(2) - prior_segment = log_text[previous_block_end:block.start()] + for index, block_start in enumerate(cloud_starts): + next_cloud_start = cloud_starts[index + 1] if index + 1 < len(cloud_starts) else len(log_text) + next_spec_match = re.search(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - INFO - Extracted specPattern:", log_text[block_start + 1 :], re.M) + block_end = next_cloud_start + if next_spec_match: + candidate_end = block_start + 1 + next_spec_match.start() + if candidate_end < block_end: + block_end = candidate_end + block_text = log_text[block_start:block_end] + currents_match = re.search(r"🏁 Recorded Run:\s*(https://\S+)", block_text) + currents_url = currents_match.group(1) if currents_match else None + prior_segment = log_text[previous_block_end:block_start] detail_source = prior_segment + "\n" + block_text host_results = extract_host_results_from_run_finished_segment(block_text, inventory) if not host_results: host_results = extract_host_results_from_run_finished_segment(prior_segment, inventory) if not host_results: - previous_block_end = block.end() + previous_block_end = block_end continue for host, result in host_results.items(): if result.failures: @@ -728,7 +730,7 @@ def extract_completed_subrun_summaries(log_text: str, inventory: Dict[str, str]) "currents_url": currents_url, } ) - previous_block_end = block.end() + previous_block_end = block_end return summaries