diff --git a/.gitignore b/.gitignore index ab880fc..6b15804 100644 --- a/.gitignore +++ b/.gitignore @@ -166,3 +166,6 @@ wcrp_compliance_reports/ docs/fremorizer.* docs/modules.rst docs/build/ +fremorizer/tests/test_files/mock_pp_writer/ +fremorizer/tests/test_files/mock_writer_output.yaml +fremorizer/tests/test_files/mock_writer_varlists/ diff --git a/fremorizer/cmor_helpers.py b/fremorizer/cmor_helpers.py index ea3ab8b..8d6ff43 100644 --- a/fremorizer/cmor_helpers.py +++ b/fremorizer/cmor_helpers.py @@ -800,6 +800,12 @@ def filter_brands( brands: list, :return: The single brand string that survived disambiguation. :rtype: str """ + # guard: brands list must be non-empty + if not brands: + raise ValueError( + f'filter_brands called with an empty brands list for ' + f'{target_var!r} \u2014 there is nothing to disambiguate') + # map input vertical dim to MIP equivalent expected_mip_vert = None if input_vert_dim != 0: @@ -836,13 +842,25 @@ def filter_brands( brands: list, if len(filtered_brands) == 0: fre_logger.error('cmip7 brand disambiguation eliminated all candidates ' - 'from %s', brands) + 'from %s for target_var %s ' + '(has_time_bnds=%s, input_vert_dim=%s)', + brands, target_var, has_time_bnds, input_vert_dim) raise ValueError( - f'multiple brands {brands} found for {target_var}, ' - f'but none survived disambiguation filtering') + f'all candidate brands {brands} for {target_var!r} were eliminated ' + f'during disambiguation (has_time_bnds={has_time_bnds}, ' + f'input_vert_dim={input_vert_dim!r}). none survived filtering. ' + f'verify that the input file has the expected time bounds and ' + f'vertical coordinate, and that the MIP table entries for ' + f'{[f"{target_var}_{b}" for b in brands]} are correct') fre_logger.error('cmip7 brand disambiguation could not resolve between ' - '%s', filtered_brands) + '%s for target_var %s ' + '(has_time_bnds=%s, input_vert_dim=%s)', + filtered_brands, target_var, has_time_bnds, input_vert_dim) raise ValueError( - f'multiple brands {filtered_brands} remain for {target_var} after ' - f'disambiguation \u2014 cannot determine which brand to use') + f'multiple brands {filtered_brands} remain for {target_var!r} after ' + f'disambiguation (has_time_bnds={has_time_bnds}, ' + f'input_vert_dim={input_vert_dim!r}) \u2014 cannot determine which ' + f'brand to use. verify the input file dimensions and check whether ' + f'the MIP table has duplicate entries that share the same time-type ' + f'and vertical coordinate') diff --git a/fremorizer/cmor_mixer.py b/fremorizer/cmor_mixer.py index 1bfb07b..fa345d2 100644 --- a/fremorizer/cmor_mixer.py +++ b/fremorizer/cmor_mixer.py @@ -126,16 +126,21 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, var_brand = None exp_cfg_mip_era = get_json_file_data(json_exp_config)['mip_era'].upper() if exp_cfg_mip_era == 'CMIP7': + brand_prefix = target_var + '_' brands = [] for mip_var in mip_var_cfgs['variable_entry'].keys(): - if all([ target_var == mip_var.split('_')[0], - var_dim == len(mip_var_cfgs['variable_entry'][mip_var]['dimensions']) ]): - brands.append(mip_var.split('_')[1]) - - if len(brands)>0: - if len(brands)==1: - var_brand=brands[0] - fre_logger.debug('cmip7 case, extracted brand %s',var_brand) + if not mip_var.startswith(brand_prefix): + continue + brand = mip_var[len(brand_prefix):] + if not brand: + continue + if var_dim == len(mip_var_cfgs['variable_entry'][mip_var]['dimensions']): + brands.append(brand) + + if len(brands) > 0: + if len(brands) == 1: + var_brand = brands[0] + fre_logger.debug('cmip7 case, extracted brand %s', var_brand) else: fre_logger.warning('cmip7 case, extracted multiple brands %s, attempting disambiguation', brands) @@ -146,9 +151,19 @@ def rewrite_netcdf_file_var( mip_var_cfgs: dict = None, ) else: - fre_logger.error('cmip7 case detected, but dimensions of input data do not match ' - 'any of those found for the associated brands.') - raise ValueError('no variable brand was able to be identified for this CMIP7 case') + fre_logger.error( + 'CMIP7 branded-variable error for %s: no brands in the MIP ' + 'table matched with %d dimension(s). available branded entries ' + 'for this variable: %s', + target_var, var_dim, + [k for k in mip_var_cfgs["variable_entry"] if k.startswith(brand_prefix)] + ) + raise ValueError( + f'no brands in the MIP table match {target_var!r} with ' + f'{var_dim} dimension(s). check that the MIP table contains a ' + f'branded entry (e.g. {target_var}_) whose dimension ' + f'count matches the input data' + ) fre_logger.debug('cmip7 case, filtered possible brands to %s', var_brand) else: fre_logger.debug('non-cmip7 case detected, skipping variable brands') diff --git a/fremorizer/tests/test_cmor_helpers.py b/fremorizer/tests/test_cmor_helpers.py index 2dd231c..dd5328c 100644 --- a/fremorizer/tests/test_cmor_helpers.py +++ b/fremorizer/tests/test_cmor_helpers.py @@ -411,7 +411,7 @@ def test_filter_brands_all_eliminated(): 'sos_a': 'longitude latitude time1', 'sos_b': 'longitude latitude time1', }) - with pytest.raises(ValueError, match='none survived'): + with pytest.raises(ValueError, match='none survived filtering'): filter_brands( brands=['a', 'b'], target_var='sos', @@ -427,7 +427,7 @@ def test_filter_brands_multiple_remain(): 'sos_a': 'longitude latitude time', 'sos_b': 'longitude latitude time', }) - with pytest.raises(ValueError, match='remain for sos'): + with pytest.raises(ValueError, match="remain for 'sos'"): filter_brands( brands=['a', 'b'], target_var='sos', @@ -435,3 +435,47 @@ def test_filter_brands_multiple_remain(): has_time_bnds=True, input_vert_dim=0, ) + + +def test_filter_brands_all_eliminated_message_contains_properties(): + ''' the error message should include the input properties that led to elimination ''' + mip = _make_mip_var_cfgs({ + "sos_a": "longitude latitude time1", + }) + with pytest.raises(ValueError, match=r'has_time_bnds=True.*input_vert_dim=0'): + filter_brands( + brands=["a"], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + ) + + +def test_filter_brands_multiple_remain_message_contains_properties(): + ''' the error message for multiple remaining brands should include input properties ''' + mip = _make_mip_var_cfgs({ + "sos_a": "longitude latitude time", + "sos_b": "longitude latitude time", + }) + with pytest.raises(ValueError, match=r'has_time_bnds=True.*input_vert_dim=0'): + filter_brands( + brands=["a", "b"], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + ) + + +def test_filter_brands_empty_brands_list(): + ''' should raise ValueError when brands list is empty ''' + mip = _make_mip_var_cfgs({}) + with pytest.raises(ValueError, match='empty brands list'): + filter_brands( + brands=[], + target_var="sos", + mip_var_cfgs=mip, + has_time_bnds=True, + input_vert_dim=0, + )