diff --git a/atvm/docs/automation/guide.md b/atvm/docs/automation/guide.md index b0862ca..afb80e5 100644 --- a/atvm/docs/automation/guide.md +++ b/atvm/docs/automation/guide.md @@ -242,12 +242,15 @@ Status-report expectations: - Do not include generic watcher bookkeeping messages in `NOTES:` such as artifact-detection confirmations. - Do not include internal watcher fallback notes in `NOTES:` such as `check-xml-files.ts` validation confirmations or reporter-artifact recovery details. - The `HOSTS:` table includes `Host`, `Kernel`, `Status`, and `Detail` columns in that order. -- In `COVERAGE:`, describe the important `cmc-templates.py` command inputs such as template, categorize mode, datastore/config family, config filename, migration style, plugin/integration path, and other operator-relevant run options, but do not list target hosts there or include verbose prose scope descriptions. +- In `COVERAGE:`, describe the important `cmc-templates.py` command inputs such as template, categorize mode, datastore/config family, config filename, migration style, any real plugin/integration path, and other operator-relevant run options, but do not list target hosts there or include verbose prose scope descriptions. +- Only include coverage fields that the template command actually used. Do not show empty or irrelevant fields such as an integration/plugin path for templates that did not use one. - If `categorize mode: enabled` is already shown in `COVERAGE:`, do not also repeat `--categorize` under `run options`. - When grouped categorized timing is reconstructed from host reporter artifacts, derive per-host quickest/longest/average durations from the sequence of recovered host timestamps and the grouped end time instead of leaving those metrics as `n/a`. - In `TEST FLOW:`, show the template-specific numbered run flow once for the whole test, not per host. - Resolve the flow from the run template name. - `cmc-e2e` currently uses the 22-step migration flow documented in `/home/aw/code/cds/atvm/docs/automation/status-template.md`. +- `cmc-systemOS` currently uses the 21-step boot-disk migration flow documented in `/home/aw/code/cds/atvm/docs/automation/status-template.md`. +- Keep `NOTES:` behavior consistent across template types; do not add template-specific internal-source notes such as parent-log-summary recovery details. - For the `Kernel` column, cross-reference the host name against `/home/aw/code/cds/atvm/inventory/vm-inventory.md`. - If the hostname is not present in `vm-inventory.md`, report the kernel value as `unknown`. - Treat references to the "ATVM automation run" or "automation run" as referring to this ATVM folder workflow and the automation VM at `192.168.3.190`, not to Cirrus project operations such as the `atvm - cypress` project. diff --git a/atvm/docs/automation/run-learnings.md b/atvm/docs/automation/run-learnings.md index feef801..076b540 100644 --- a/atvm/docs/automation/run-learnings.md +++ b/atvm/docs/automation/run-learnings.md @@ -225,6 +225,18 @@ This file stores run-specific examples only when a run produced a new learning r - `21. Clean up iSCSI targets` - `22. Power off` +## Run Learning: 2026-03-27 (Template-specific coverage fields and systemOS flow) +- Observed requirement: + - `COVERAGE:` should only show fields that were actually present in the `cmc-templates.py` command for that template. + - Showing an empty integration/plugin path on a template that does not use one adds noise and misleads the reader. + - `cmc-systemOS` needs its own full numbered `TEST FLOW:` list rather than falling back to the generic short placeholder flow. + - `NOTES:` should stay consistent across templates and should not include internal parent-summary recovery notes for `cmc-systemOS`. +- Action for future runs: + - Render `COVERAGE:` from the actual template command inputs used for that run. + - Omit integration/plugin coverage lines when the template command did not use them. + - Use the 21-step `cmc-systemOS` flow from `status-template.md`. + - Keep `NOTES:` template-consistent and operator-facing, without parent-log-summary recovery notes. + ## Run Learning: 2026-03-27 (Start watcher before runner when watcher is requested) - Observed failure mode: - Starting `run-sorry-cypress.py` before the watcher can race with the watcher helper's stale-log cleanup. diff --git a/atvm/docs/automation/status-template.md b/atvm/docs/automation/status-template.md index da017bd..cbca28c 100644 --- a/atvm/docs/automation/status-template.md +++ b/atvm/docs/automation/status-template.md @@ -45,7 +45,7 @@ Use this as the default ATVM automation run-status template for: - datastore/config family: `` - config file: `` - migration style: `` -- integration/plugin path: `` +- integration/plugin path: `` when the template command actually uses one - run options: `` **TEST FLOW:** @@ -79,7 +79,8 @@ Use this as the default ATVM automation run-status template for: - Do not include generic watcher bookkeeping lines in `NOTES:` such as "run artifacts were detected" or "final reporting artifacts were detected." - Do not include internal fallback notes in `NOTES:` such as "`check-xml-files.ts` validation passed" or "host details were derived from reporter artifacts." - `COVERAGE:` should describe what the run was intended to cover without listing target hosts. -- `COVERAGE:` should mostly mirror the important `cmc-templates.py` command inputs such as template, categorize mode, config filename, integration/plugin path, and important flags like `--ignore_force_shutdown`. +- `COVERAGE:` should mostly mirror the important `cmc-templates.py` command inputs such as template, categorize mode, config filename, any real integration/plugin path, and important flags like `--ignore_force_shutdown`. +- Do not render template-command fields in `COVERAGE:` when that template did not use them. - If `categorize mode: enabled` is shown, do not also repeat `--categorize` under `run options`. - When grouped categorized timing is reconstructed from host reporter artifacts, still populate `quickest`, `longest`, and `average` from inferred per-host durations when possible. - `TEST FLOW:` should describe the template-specific numbered run flow once for the whole test, not per host. @@ -107,6 +108,28 @@ Use this as the default ATVM automation run-status template for: - `20. Uninstall CMC` - `21. Clean up iSCSI targets` - `22. Power off` +- `cmc-systemOS` currently uses this flow: + - `1. Verifying set up` + - `2. Power on and obtain ip address and host name` + - `3. Uninstall CMC if still exists` + - `4. Attach destination disk on the host` + - `5. Copy CMC install command from GUI` + - `6. Install CMC on the host` + - `7. Create migration session (Simple Migration)` + - `8. Tracking Changes (Simple Migration)` + - `9. Finalize cutover (Simple Migration)` + - `10. Create migration report (Simple Migration)` + - `11. Delete migration session (Simple Migration)` + - `12. Power off the host` + - `13. Detach original source OS disk` + - `14. Reassign destination OS disk` + - `15. Power on to verify destination disk` + - `16. Power off the host` + - `17. Detach destination OS disk` + - `18. Attach original source OS disk back` + - `19. Power on and obtain ip address and host name` + - `20. Uninstall CMC on the host` + - `21. Power off the host` - See `/home/aw/code/cds/atvm/docs/automation/examples.md` for `cmc-e2e` examples. - Resolve kernel values by cross-referencing hostnames against `/home/aw/code/cds/atvm/inventory/vm-inventory.md`. - If no kernel value can be verified from `vm-inventory.md`, use `unknown`. diff --git a/atvm/watcher-service/atvm_run_watcher.py b/atvm/watcher-service/atvm_run_watcher.py index c194c68..ee6eb5d 100644 --- a/atvm/watcher-service/atvm_run_watcher.py +++ b/atvm/watcher-service/atvm_run_watcher.py @@ -64,6 +64,29 @@ TEMPLATE_TEST_FLOWS = { "21. Clean up iSCSI targets", "22. Power off", ], + "cmc-systemOS": [ + "1. Verifying set up", + "2. Power on and obtain ip address and host name", + "3. Uninstall CMC if still exists", + "4. Attach destination disk on the host", + "5. Copy CMC install command from GUI", + "6. Install CMC on the host", + "7. Create migration session (Simple Migration)", + "8. Tracking Changes (Simple Migration)", + "9. Finalize cutover (Simple Migration)", + "10. Create migration report (Simple Migration)", + "11. Delete migration session (Simple Migration)", + "12. Power off the host", + "13. Detach original source OS disk", + "14. Reassign destination OS disk", + "15. Power on to verify destination disk", + "16. Power off the host", + "17. Detach destination OS disk", + "18. Attach original source OS disk back", + "19. Power on and obtain ip address and host name", + "20. Uninstall CMC on the host", + "21. Power off the host", + ], } @@ -733,6 +756,24 @@ def get_test_flow(template_name: object) -> List[str]: return TEMPLATE_TEST_FLOWS.get(template_name, DEFAULT_TEST_FLOW) +def coverage_lines(metadata: Dict[str, object]) -> List[str]: + lines = [ + f"- template: `{metadata['template']}`", + f"- categorize mode: `{'enabled' if metadata.get('categorized') else 'disabled'}`", + f"- datastore/config family: `{metadata['config_family']}`", + f"- config file: `{metadata.get('config_file', 'unknown')}`", + f"- migration style: {metadata['migration_style']}", + ] + integration_plugin = metadata.get("integration_plugin") + if isinstance(integration_plugin, str) and integration_plugin and integration_plugin != "unknown": + lines.append(f"- integration/plugin path: `{integration_plugin}`") + coverage_options = list(metadata.get("extra_options", [])) if isinstance(metadata.get("extra_options"), list) else [] + if metadata.get("categorized"): + coverage_options = [value for value in coverage_options if value != "--categorize"] + lines.append(f"- run options: {', '.join(f'`{value}`' for value in coverage_options) or 'none'}") + return lines + + def infer_missing_host_durations(ordered_hosts: List[HostResult], end_ts: Optional[datetime]) -> None: timed_hosts = [host for host in ordered_hosts if host.timestamp] if not timed_hosts: @@ -790,9 +831,7 @@ def build_status_markdown( notes_block = "\n".join(f"- {note}" for note in notes) if notes else "- none" test_flow_lines = [f"- {step}" for step in get_test_flow(metadata.get("template"))] - coverage_options = list(metadata.get("extra_options", [])) if isinstance(metadata.get("extra_options"), list) else [] - if metadata.get("categorized"): - coverage_options = [value for value in coverage_options if value != "--categorize"] + coverage_block = coverage_lines(metadata) lines = [ "## ATVM Run Status", @@ -823,13 +862,7 @@ def build_status_markdown( f"| average | {format_duration(average) if average is not None else 'n/a'} |", "", "**COVERAGE:**", - f"- template: `{metadata['template']}`", - f"- categorize mode: `{'enabled' if metadata.get('categorized') else 'disabled'}`", - f"- datastore/config family: `{metadata['config_family']}`", - f"- config file: `{metadata.get('config_file', 'unknown')}`", - f"- migration style: {metadata['migration_style']}", - f"- integration/plugin path: `{metadata['integration_plugin']}`", - f"- run options: {', '.join(f'`{value}`' for value in coverage_options) or 'none'}", + *coverage_block, "", "**TEST FLOW:**", *test_flow_lines, @@ -1197,9 +1230,7 @@ def discover_categorized_subruns( elif check_ts or raw_display_name != current_subrun_build or not parent_active: state = "FAILED" if any(result.failures for result in host_results.values()) else "COMPLETED" notes = [f"Categorized sub-run discovered from reporter file `{xml_path.name}`."] - if summary and host_results: - notes.append("Host result details were derived from the parent categorized run log summary.") - elif check_ts and not host_results and parent_active: + if check_ts and not host_results and parent_active: notes.append("Grouped reporter XML arrived before the parent run log exposed the final host summary; waiting to post until host details are available.") if display_name != raw_display_name: notes.append(f"Child build id was reported as `{raw_display_name}`, but the actual grouped run was inferred from host execution as `{display_name}`.")