Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions tests/integration-tests/tests/common/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,40 @@ def assert_no_msg_in_logs(remote_command_executor: RemoteCommandExecutor, log_fi
assert_that(log).does_not_contain(message)


def assert_no_defunct_slurm_config_params(remote_command_executor: RemoteCommandExecutor, ignore_patterns=None):
"""
Assert slurmctld.log has no warnings about defunct or obsolete slurm.conf parameters.

Slurm logs messages like "Ignoring obsolete <Param>=<Value> option" when it encounters
configuration parameters that have been removed in the running Slurm version.
This catches cases where our generated slurm.conf contains settings that are no longer
recognized by the installed Slurm version.
"""
__tracebackhide__ = True
log_file = "/var/log/slurmctld.log"
log_file_user = remote_command_executor.get_user_to_operate_on_file(log_file)
log = remote_command_executor.run_remote_command(f"sudo -u {log_file_user} cat {log_file}", hide=True).stdout

defunct_patterns = [
"Ignoring obsolete",
"Ignoring defunct",
"is defunct",
]

offending_lines = []
for line in log.splitlines():
if any(pattern in line for pattern in defunct_patterns):
if ignore_patterns and any(ip in line for ip in ignore_patterns):
continue
offending_lines.append(line.strip())

if offending_lines:
pytest.fail(
"slurmctld.log contains warnings about defunct/obsolete slurm.conf parameters:\n"
+ "\n".join(offending_lines)
)


def assert_msg_in_log(remote_command_executor: RemoteCommandExecutor, log_file: str, message: str):
"""Assert message is in log_file."""
__tracebackhide__ = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from utils import to_snake_case

from tests.cloudwatch_logging import cloudwatch_logging_boto3_utils as cw_utils
from tests.common.assertions import assert_no_defunct_slurm_config_params
from tests.common.utils import get_aws_domain

STARTED_PATTERN = re.compile(r".*slurmdbd version [\d.]+ started")
Expand Down Expand Up @@ -245,6 +246,7 @@ def test_slurm_accounting(
_test_slurm_accounting_password(remote_command_executor)
_test_slurm_accounting_database_name(remote_command_executor, custom_database_name)
_test_that_slurmdbd_is_running(remote_command_executor)
assert_no_defunct_slurm_config_params(remote_command_executor)


@pytest.mark.usefixtures("os", "instance", "scheduler")
Expand Down Expand Up @@ -301,6 +303,7 @@ def _check_cluster_external_dbd(cluster, config_params, region, scheduler_comman
headnode_remote_command_executor,
)
_test_jobs_get_recorded(scheduler_commands)
assert_no_defunct_slurm_config_params(headnode_remote_command_executor)


def _check_inter_clusters_external_dbd(cluster_1, cluster_2, scheduler_commands_factory):
Expand Down
Loading