Wait for IOM to come back online after restart instead of fixed sleep

A fixed 30s sleep is not enough for a full BMC boot cycle. Add
_wait_for_iom_online() which polls GET /redfish/v1/ every 15s until the
IOM responds (up to 5 minutes), then call it after every IOM restart in
both _update_iom_fw and _update_fabric_fw. This ensures the fabric card
update (and post-update validation) don't run while the IOM is still
booting and unreachable.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-16 13:17:53 -04:00
parent 3dc9a5538e
commit 278c0c03b3
2 changed files with 32 additions and 4 deletions

View File

@@ -181,6 +181,25 @@ def _redfish_poll_tasks(password: str, host: str, timeout: int = 600) -> tuple:
return False, f"Timeout after {timeout}s waiting for tasks." return False, f"Timeout after {timeout}s waiting for tasks."
def _wait_for_iom_online(password: str, host: str, timeout: int = 300) -> bool:
"""
Poll GET /redfish/v1/ until the IOM responds successfully, indicating it
has finished rebooting. Waits up to `timeout` seconds.
Returns True if the IOM came back online, False if timeout was exceeded.
"""
deadline = time.monotonic() + timeout
attempt = 0
while time.monotonic() < deadline:
attempt += 1
ok_flag, _ = _redfish_request(password, "GET", "/redfish/v1/", host=host)
if ok_flag:
return True
elapsed = int(time.monotonic() - (deadline - timeout))
info(f" Waiting for IOM to come back online... [{elapsed}s elapsed]")
time.sleep(15)
return False
def _redfish_restart_iom(password: str, host: str, iom: str) -> tuple: def _redfish_restart_iom(password: str, host: str, iom: str) -> tuple:
return _redfish_request( return _redfish_request(
password, "POST", password, "POST",

View File

@@ -12,6 +12,7 @@ from redfish import (
_redfish_poll_tasks, _redfish_poll_tasks,
_redfish_restart_iom, _redfish_restart_iom,
_redfish_reset_fabric, _redfish_reset_fabric,
_wait_for_iom_online,
_show_fw_versions, _show_fw_versions,
) )
from ui import ( from ui import (
@@ -161,8 +162,12 @@ def _update_iom_fw(password: str, ip: str, iom: str, fw_path: str) -> bool:
info(f"Restarting {iom}...") info(f"Restarting {iom}...")
_redfish_restart_iom(password, ip, iom) # connection drop on restart is normal _redfish_restart_iom(password, ip, iom) # connection drop on restart is normal
ok(f"{iom} restart initiated. Waiting 30s for reboot...") ok(f"{iom} restart initiated. Waiting for IOM to come back online...")
time.sleep(30) time.sleep(30) # allow time for the IOM to begin shutting down before polling
if _wait_for_iom_online(password, ip):
ok(f"{iom} is back online.")
else:
warn(f"{iom} did not respond within 5 minutes — proceeding anyway.")
return True return True
@@ -204,8 +209,12 @@ def _update_fabric_fw(password: str, ip: str, iom: str, fw_path: str) -> bool:
info(f"Restarting {iom} after Fabric Card update...") info(f"Restarting {iom} after Fabric Card update...")
_redfish_restart_iom(password, ip, iom) _redfish_restart_iom(password, ip, iom)
ok(f"{iom} restart initiated. Waiting 30s for reboot...") ok(f"{iom} restart initiated. Waiting for IOM to come back online...")
time.sleep(30) time.sleep(30) # allow time for the IOM to begin shutting down before polling
if _wait_for_iom_online(password, ip):
ok(f"{iom} is back online.")
else:
warn(f"{iom} did not respond within 5 minutes — proceeding anyway.")
return True return True