diff --git a/tests/integration-tests/tests/common/assertions.py b/tests/integration-tests/tests/common/assertions.py index 897c76ccf7..2040a20dbb 100644 --- a/tests/integration-tests/tests/common/assertions.py +++ b/tests/integration-tests/tests/common/assertions.py @@ -91,6 +91,40 @@ def assert_no_msg_in_logs(remote_command_executor: RemoteCommandExecutor, log_fi assert_that(log).does_not_contain(message) +def assert_no_defunct_slurm_config_params(remote_command_executor: RemoteCommandExecutor, ignore_patterns=None): + """ + Assert slurmctld.log has no warnings about defunct or obsolete slurm.conf parameters. + + Slurm logs messages like "Ignoring obsolete = option" when it encounters + configuration parameters that have been removed in the running Slurm version. + This catches cases where our generated slurm.conf contains settings that are no longer + recognized by the installed Slurm version. + """ + __tracebackhide__ = True + log_file = "/var/log/slurmctld.log" + log_file_user = remote_command_executor.get_user_to_operate_on_file(log_file) + log = remote_command_executor.run_remote_command(f"sudo -u {log_file_user} cat {log_file}", hide=True).stdout + + defunct_patterns = [ + "Ignoring obsolete", + "Ignoring defunct", + "is defunct", + ] + + offending_lines = [] + for line in log.splitlines(): + if any(pattern in line for pattern in defunct_patterns): + if ignore_patterns and any(ip in line for ip in ignore_patterns): + continue + offending_lines.append(line.strip()) + + if offending_lines: + pytest.fail( + "slurmctld.log contains warnings about defunct/obsolete slurm.conf parameters:\n" + + "\n".join(offending_lines) + ) + + def assert_msg_in_log(remote_command_executor: RemoteCommandExecutor, log_file: str, message: str): """Assert message is in log_file.""" __tracebackhide__ = True diff --git a/tests/integration-tests/tests/schedulers/test_slurm_accounting.py b/tests/integration-tests/tests/schedulers/test_slurm_accounting.py index 0c580d05fb..93acb51d5d 100644 --- a/tests/integration-tests/tests/schedulers/test_slurm_accounting.py +++ b/tests/integration-tests/tests/schedulers/test_slurm_accounting.py @@ -11,6 +11,7 @@ from utils import to_snake_case from tests.cloudwatch_logging import cloudwatch_logging_boto3_utils as cw_utils +from tests.common.assertions import assert_no_defunct_slurm_config_params from tests.common.utils import get_aws_domain STARTED_PATTERN = re.compile(r".*slurmdbd version [\d.]+ started") @@ -245,6 +246,7 @@ def test_slurm_accounting( _test_slurm_accounting_password(remote_command_executor) _test_slurm_accounting_database_name(remote_command_executor, custom_database_name) _test_that_slurmdbd_is_running(remote_command_executor) + assert_no_defunct_slurm_config_params(remote_command_executor) @pytest.mark.usefixtures("os", "instance", "scheduler") @@ -301,6 +303,7 @@ def _check_cluster_external_dbd(cluster, config_params, region, scheduler_comman headnode_remote_command_executor, ) _test_jobs_get_recorded(scheduler_commands) + assert_no_defunct_slurm_config_params(headnode_remote_command_executor) def _check_inter_clusters_external_dbd(cluster_1, cluster_2, scheduler_commands_factory):