diff --git a/scripts/world_bank/wdi/worldbank.py b/scripts/world_bank/wdi/worldbank.py index 126e4e281e..dcb5d4757f 100644 --- a/scripts/world_bank/wdi/worldbank.py +++ b/scripts/world_bank/wdi/worldbank.py @@ -255,9 +255,11 @@ def read_worldbank(iso3166alpha3, mode): if file.startswith("API"): file_to_open = file break - assert file_to_open is not None, \ - "Failed to find data for" + iso3166alpha3 - + if file_to_open is None: + logging.warning( + 'Failed to find data for %s in the downloaded ZIP. Skipping.', + iso3166alpha3) + return None df = None # Captures any text contained in double quotatations. line_match = re.compile(r"\"([^\"]*)\"") @@ -415,15 +417,21 @@ def download_indicator_data(worldbank_countries, indicator_codes, mode): for index, country_code in enumerate(worldbank_countries['ISO3166Alpha3']): country_df = read_worldbank(country_code, mode) + if country_df is None: + continue + # Remove unneccessary indicators. country_df = country_df[country_df['IndicatorCode'].isin( indicators_to_keep)] - # Map country codes to ISO. country_df['ISO3166Alpha3'] = country_code - # Add new row to main datframe. country_df_list.append(country_df) + # 3. Handle the empty list case OUTSIDE the loop + if not country_df_list: + logging.error("No data was downloaded for any country.") + # Return empty DF with expected columns to satisfy the rest of the pipeline + return pd.DataFrame(columns=['StatisticalVariable', 'Year', 'Value']) worldbank_dataframe = pd.concat(country_df_list) # Map indicator codes to unique Statistical Variable.