Wait for IOM to come back online after restart instead of fixed sleep
A fixed 30s sleep is not enough for a full BMC boot cycle. Add _wait_for_iom_online() which polls GET /redfish/v1/ every 15s until the IOM responds (up to 5 minutes), then call it after every IOM restart in both _update_iom_fw and _update_fabric_fw. This ensures the fabric card update (and post-update validation) don't run while the IOM is still booting and unreachable. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -181,6 +181,25 @@ def _redfish_poll_tasks(password: str, host: str, timeout: int = 600) -> tuple:
|
|||||||
return False, f"Timeout after {timeout}s waiting for tasks."
|
return False, f"Timeout after {timeout}s waiting for tasks."
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_for_iom_online(password: str, host: str, timeout: int = 300) -> bool:
|
||||||
|
"""
|
||||||
|
Poll GET /redfish/v1/ until the IOM responds successfully, indicating it
|
||||||
|
has finished rebooting. Waits up to `timeout` seconds.
|
||||||
|
Returns True if the IOM came back online, False if timeout was exceeded.
|
||||||
|
"""
|
||||||
|
deadline = time.monotonic() + timeout
|
||||||
|
attempt = 0
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
attempt += 1
|
||||||
|
ok_flag, _ = _redfish_request(password, "GET", "/redfish/v1/", host=host)
|
||||||
|
if ok_flag:
|
||||||
|
return True
|
||||||
|
elapsed = int(time.monotonic() - (deadline - timeout))
|
||||||
|
info(f" Waiting for IOM to come back online... [{elapsed}s elapsed]")
|
||||||
|
time.sleep(15)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _redfish_restart_iom(password: str, host: str, iom: str) -> tuple:
|
def _redfish_restart_iom(password: str, host: str, iom: str) -> tuple:
|
||||||
return _redfish_request(
|
return _redfish_request(
|
||||||
password, "POST",
|
password, "POST",
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ from redfish import (
|
|||||||
_redfish_poll_tasks,
|
_redfish_poll_tasks,
|
||||||
_redfish_restart_iom,
|
_redfish_restart_iom,
|
||||||
_redfish_reset_fabric,
|
_redfish_reset_fabric,
|
||||||
|
_wait_for_iom_online,
|
||||||
_show_fw_versions,
|
_show_fw_versions,
|
||||||
)
|
)
|
||||||
from ui import (
|
from ui import (
|
||||||
@@ -161,8 +162,12 @@ def _update_iom_fw(password: str, ip: str, iom: str, fw_path: str) -> bool:
|
|||||||
|
|
||||||
info(f"Restarting {iom}...")
|
info(f"Restarting {iom}...")
|
||||||
_redfish_restart_iom(password, ip, iom) # connection drop on restart is normal
|
_redfish_restart_iom(password, ip, iom) # connection drop on restart is normal
|
||||||
ok(f"{iom} restart initiated. Waiting 30s for reboot...")
|
ok(f"{iom} restart initiated. Waiting for IOM to come back online...")
|
||||||
time.sleep(30)
|
time.sleep(30) # allow time for the IOM to begin shutting down before polling
|
||||||
|
if _wait_for_iom_online(password, ip):
|
||||||
|
ok(f"{iom} is back online.")
|
||||||
|
else:
|
||||||
|
warn(f"{iom} did not respond within 5 minutes — proceeding anyway.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@@ -204,8 +209,12 @@ def _update_fabric_fw(password: str, ip: str, iom: str, fw_path: str) -> bool:
|
|||||||
|
|
||||||
info(f"Restarting {iom} after Fabric Card update...")
|
info(f"Restarting {iom} after Fabric Card update...")
|
||||||
_redfish_restart_iom(password, ip, iom)
|
_redfish_restart_iom(password, ip, iom)
|
||||||
ok(f"{iom} restart initiated. Waiting 30s for reboot...")
|
ok(f"{iom} restart initiated. Waiting for IOM to come back online...")
|
||||||
time.sleep(30)
|
time.sleep(30) # allow time for the IOM to begin shutting down before polling
|
||||||
|
if _wait_for_iom_online(password, ip):
|
||||||
|
ok(f"{iom} is back online.")
|
||||||
|
else:
|
||||||
|
warn(f"{iom} did not respond within 5 minutes — proceeding anyway.")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user