From 278c0c03b375ffa144d68e0a07f442d079c34028 Mon Sep 17 00:00:00 2001 From: scott Date: Thu, 16 Apr 2026 13:17:53 -0400 Subject: [PATCH] Wait for IOM to come back online after restart instead of fixed sleep A fixed 30s sleep is not enough for a full BMC boot cycle. Add _wait_for_iom_online() which polls GET /redfish/v1/ every 15s until the IOM responds (up to 5 minutes), then call it after every IOM restart in both _update_iom_fw and _update_fabric_fw. This ensures the fabric card update (and post-update validation) don't run while the IOM is still booting and unreachable. Co-Authored-By: Claude Sonnet 4.6 --- modules/redfish.py | 19 +++++++++++++++++++ modules/workflow_firmware.py | 17 +++++++++++++---- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/modules/redfish.py b/modules/redfish.py index c4e5fb6..1eada80 100644 --- a/modules/redfish.py +++ b/modules/redfish.py @@ -181,6 +181,25 @@ def _redfish_poll_tasks(password: str, host: str, timeout: int = 600) -> tuple: return False, f"Timeout after {timeout}s waiting for tasks." +def _wait_for_iom_online(password: str, host: str, timeout: int = 300) -> bool: + """ + Poll GET /redfish/v1/ until the IOM responds successfully, indicating it + has finished rebooting. Waits up to `timeout` seconds. + Returns True if the IOM came back online, False if timeout was exceeded. + """ + deadline = time.monotonic() + timeout + attempt = 0 + while time.monotonic() < deadline: + attempt += 1 + ok_flag, _ = _redfish_request(password, "GET", "/redfish/v1/", host=host) + if ok_flag: + return True + elapsed = int(time.monotonic() - (deadline - timeout)) + info(f" Waiting for IOM to come back online... [{elapsed}s elapsed]") + time.sleep(15) + return False + + def _redfish_restart_iom(password: str, host: str, iom: str) -> tuple: return _redfish_request( password, "POST", diff --git a/modules/workflow_firmware.py b/modules/workflow_firmware.py index dc1ad6a..6218923 100644 --- a/modules/workflow_firmware.py +++ b/modules/workflow_firmware.py @@ -12,6 +12,7 @@ from redfish import ( _redfish_poll_tasks, _redfish_restart_iom, _redfish_reset_fabric, + _wait_for_iom_online, _show_fw_versions, ) from ui import ( @@ -161,8 +162,12 @@ def _update_iom_fw(password: str, ip: str, iom: str, fw_path: str) -> bool: info(f"Restarting {iom}...") _redfish_restart_iom(password, ip, iom) # connection drop on restart is normal - ok(f"{iom} restart initiated. Waiting 30s for reboot...") - time.sleep(30) + ok(f"{iom} restart initiated. Waiting for IOM to come back online...") + time.sleep(30) # allow time for the IOM to begin shutting down before polling + if _wait_for_iom_online(password, ip): + ok(f"{iom} is back online.") + else: + warn(f"{iom} did not respond within 5 minutes — proceeding anyway.") return True @@ -204,8 +209,12 @@ def _update_fabric_fw(password: str, ip: str, iom: str, fw_path: str) -> bool: info(f"Restarting {iom} after Fabric Card update...") _redfish_restart_iom(password, ip, iom) - ok(f"{iom} restart initiated. Waiting 30s for reboot...") - time.sleep(30) + ok(f"{iom} restart initiated. Waiting for IOM to come back online...") + time.sleep(30) # allow time for the IOM to begin shutting down before polling + if _wait_for_iom_online(password, ip): + ok(f"{iom} is back online.") + else: + warn(f"{iom} did not respond within 5 minutes — proceeding anyway.") return True