diff --git a/src/ispypsa/iasr_table_caching/local_cache.py b/src/ispypsa/iasr_table_caching/local_cache.py index 71676542..c2a70318 100644 --- a/src/ispypsa/iasr_table_caching/local_cache.py +++ b/src/ispypsa/iasr_table_caching/local_cache.py @@ -20,6 +20,7 @@ def _build_required_tables() -> list[str]: _NETWORK_REQUIRED_TABLES = [ "sub_regional_reference_nodes", "renewable_energy_zones", + "flow_path_transfer_capability", ] return _NETWORK_REQUIRED_TABLES else: diff --git a/src/ispypsa/templater/create_template.py b/src/ispypsa/templater/create_template.py index 08516df5..9b47baf8 100644 --- a/src/ispypsa/templater/create_template.py +++ b/src/ispypsa/templater/create_template.py @@ -32,6 +32,7 @@ _template_new_generators_static_properties, ) from ispypsa.templater.storage import _template_battery_properties +from ispypsa.templater.transmission_paths import _template_network_transmission_paths _BASE_TEMPLATE_OUTPUTS = [ "sub_regions", @@ -137,6 +138,10 @@ def create_ispypsa_inputs_template( iasr_tables["sub_regional_reference_nodes"], iasr_tables["renewable_energy_zones"], ) + template["network_transmission_paths"] = _template_network_transmission_paths( + iasr_tables["flow_path_transfer_capability"], + iasr_tables["renewable_energy_zones"], + ) return template template = {} diff --git a/src/ispypsa/templater/transmission_paths.py b/src/ispypsa/templater/transmission_paths.py new file mode 100644 index 00000000..f5c91867 --- /dev/null +++ b/src/ispypsa/templater/transmission_paths.py @@ -0,0 +1,79 @@ +import pandas as pd + +_HVDC_PATH_IDS = {"NNSW-SQ_Terranora", "WNV-CSA_Murraylink", "TAS-SEV"} + + +def _template_network_transmission_paths( + flow_path_transfer_capability: pd.DataFrame, + renewable_energy_zones: pd.DataFrame, +) -> pd.DataFrame: + """Creates the network_transmission_paths topology table. + + Args: + flow_path_transfer_capability: IASR flow path transfer capability table. + renewable_energy_zones: IASR renewable energy zones table. + + Returns: + DataFrame with columns: path_id, geo_from, geo_to, carrier. + """ + flow_path_rows = _extract_flow_path_rows(flow_path_transfer_capability) + rez_connection_rows = _extract_rez_connection_rows(renewable_energy_zones) + return pd.concat([flow_path_rows, rez_connection_rows], ignore_index=True) + + +def _extract_flow_path_rows( + flow_path_transfer_capability: pd.DataFrame, +) -> pd.DataFrame: + topology = _parse_flow_path_topology(flow_path_transfer_capability["Flow Paths"]) + topology["carrier"] = topology["path_id"].map( + lambda pid: "DC" if pid in _HVDC_PATH_IDS else "AC" + ) + return topology[["path_id", "geo_from", "geo_to", "carrier"]] + + +def _parse_flow_path_topology(name_series: pd.Series) -> pd.DataFrame: + parsed = name_series.str.strip().str.extract( + # e.g. "NNSW-SQ (Terranora)" or "CNSW-SNW-NTH" + r"^(?P[A-Z]+)" # uppercase code, e.g. "NNSW" + r"\s*[-\u2013\u2014\u00ad]+\s*" # dash/en-dash/em-dash separator + r"(?P[A-Z]+)" # uppercase code, e.g. "SQ" + r"\s*(?P.*)" # optional suffix, e.g. "(Terranora)" or "-NTH" + ) + parsed["suffix"] = parsed["suffix"].apply(_clean_suffix) + parsed["path_id"] = parsed.apply( + lambda row: _build_path_id(row["geo_from"], row["geo_to"], row["suffix"]), + axis=1, + ) + return parsed.drop(columns=["suffix"]) + + +def _clean_suffix(suffix: str) -> str: + # Handles two suffix patterns: + # parenthesized: "(Terranora)" -> "Terranora" + # dash-separated: "-NTH" -> "NTH" + suffix = suffix.strip() + if not suffix: + return "" + return suffix.strip("()").lstrip("-").strip() + + +def _build_path_id(geo_from: str, geo_to: str, suffix: str) -> str: + path_id = f"{geo_from}-{geo_to}" + if suffix: + path_id = f"{path_id}_{suffix}" + return path_id + + +def _extract_rez_connection_rows( + renewable_energy_zones: pd.DataFrame, +) -> pd.DataFrame: + return pd.DataFrame( + { + "path_id": renewable_energy_zones["ID"] + + "-" + + renewable_energy_zones["ISP sub-region"], + "geo_from": renewable_energy_zones["ID"], + "geo_to": renewable_energy_zones["ISP sub-region"], + "carrier": "AC", + } + ) diff --git a/src/ispypsa/validation/schemas/network_transmission_paths.yaml b/src/ispypsa/validation/schemas/network_transmission_paths.yaml index 894aa2c4..2ee6b9d5 100644 --- a/src/ispypsa/validation/schemas/network_transmission_paths.yaml +++ b/src/ispypsa/validation/schemas/network_transmission_paths.yaml @@ -11,7 +11,10 @@ columns: path_id: type: string required: true - description: Unique identifier for the path, formatted as geo_from-geo_to. + description: > + Unique identifier for the path. Base format is geo_from-geo_to (e.g. + CQ-NQ). Parallel paths append a suffix after an underscore (e.g. + NNSW-SQ_Terranora, CNSW-SNW_NTH). geo_from: type: string required: true diff --git a/tests/test_iasr_table_caching/test_local_cache.py b/tests/test_iasr_table_caching/test_local_cache.py index c31a6228..2d2b4307 100644 --- a/tests/test_iasr_table_caching/test_local_cache.py +++ b/tests/test_iasr_table_caching/test_local_cache.py @@ -9,7 +9,11 @@ def test_build_required_tables_new_format(): {"use_new_table_format": True}, ): result = _build_required_tables() - assert result == ["sub_regional_reference_nodes", "renewable_energy_zones"] + assert result == [ + "sub_regional_reference_nodes", + "renewable_energy_zones", + "flow_path_transfer_capability", + ] def test_build_required_tables_old_format(): diff --git a/tests/test_templater/test_create_ispypsa_inputs_template.py b/tests/test_templater/test_create_ispypsa_inputs_template.py index 3b4ac350..d6083d60 100644 --- a/tests/test_templater/test_create_ispypsa_inputs_template.py +++ b/tests/test_templater/test_create_ispypsa_inputs_template.py @@ -87,12 +87,9 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): Q1, Far North QLD, QLD, NQ N3, Central-West Orana, NSW, CNSW """) - expected = csv_str_to_df(""" - geo_id, geo_type, region_id, subregion_id - NQ, subregion, QLD, NQ - CNSW, subregion, NSW, CNSW - Q1, rez, QLD, NQ - N3, rez, NSW, CNSW + flow_path_transfer_capability = csv_str_to_df(""" + Flow Paths + CQ-NQ """) with patch( @@ -105,14 +102,34 @@ def test_create_ispypsa_inputs_template_new_format(csv_str_to_df): iasr_tables={ "sub_regional_reference_nodes": sub_regional_reference_nodes, "renewable_energy_zones": renewable_energy_zones, + "flow_path_transfer_capability": flow_path_transfer_capability, }, manually_extracted_tables={}, ) - assert list(result.keys()) == ["network_geography"] + expected_geography = csv_str_to_df(""" + geo_id, geo_type, region_id, subregion_id + NQ, subregion, QLD, NQ + CNSW, subregion, NSW, CNSW + Q1, rez, QLD, NQ + N3, rez, NSW, CNSW + """) pd.testing.assert_frame_equal( result["network_geography"].reset_index(drop=True), - expected.reset_index(drop=True), + expected_geography.reset_index(drop=True), + ) + + expected_paths = csv_str_to_df(""" + path_id, geo_from, geo_to, carrier + CQ-NQ, CQ, NQ, AC + Q1-NQ, Q1, NQ, AC + N3-CNSW, N3, CNSW, AC + """) + pd.testing.assert_frame_equal( + result["network_transmission_paths"] + .sort_values("path_id") + .reset_index(drop=True), + expected_paths.sort_values("path_id").reset_index(drop=True), ) diff --git a/tests/test_templater/test_transmission_paths.py b/tests/test_templater/test_transmission_paths.py new file mode 100644 index 00000000..03040170 --- /dev/null +++ b/tests/test_templater/test_transmission_paths.py @@ -0,0 +1,105 @@ +import pandas as pd + +from ispypsa.templater.transmission_paths import _template_network_transmission_paths + +_FLOW_PATH_COLUMNS = ["Flow Paths"] +_REZ_COLUMNS = ["ID", "Name", "NEM region", "ISP sub-region"] +_OUTPUT_COLUMNS = ["path_id", "geo_from", "geo_to", "carrier"] + + +def test_template_network_transmission_paths(csv_str_to_df): + flow_path_transfer_capability = csv_str_to_df(""" + Flow Paths + CQ-NQ + NNSW-SQ + NNSW-SQ (Terranora) + TAS-SEV + WNV-CSA (Murraylink) + CNSW-SNW-NTH + """) + + renewable_energy_zones = csv_str_to_df(""" + ID, Name, NEM region, ISP sub-region + Q1, Far North QLD, QLD, NQ + N3, Central-West Orana, NSW, CNSW + """) + + result = _template_network_transmission_paths( + flow_path_transfer_capability, renewable_energy_zones + ) + + expected = csv_str_to_df(""" + path_id, geo_from, geo_to, carrier + CQ-NQ, CQ, NQ, AC + NNSW-SQ, NNSW, SQ, AC + NNSW-SQ_Terranora, NNSW, SQ, DC + TAS-SEV, TAS, SEV, DC + WNV-CSA_Murraylink, WNV, CSA, DC + CNSW-SNW_NTH, CNSW, SNW, AC + Q1-NQ, Q1, NQ, AC + N3-CNSW, N3, CNSW, AC + """) + + pd.testing.assert_frame_equal( + result.sort_values("path_id").reset_index(drop=True), + expected.sort_values("path_id").reset_index(drop=True), + ) + + +def test_empty_flow_paths(csv_str_to_df): + flow_path_transfer_capability = pd.DataFrame(columns=_FLOW_PATH_COLUMNS) + + renewable_energy_zones = csv_str_to_df(""" + ID, Name, NEM region, ISP sub-region + Q1, Far North QLD, QLD, NQ + """) + + result = _template_network_transmission_paths( + flow_path_transfer_capability, renewable_energy_zones + ) + + expected = csv_str_to_df(""" + path_id, geo_from, geo_to, carrier + Q1-NQ, Q1, NQ, AC + """) + + pd.testing.assert_frame_equal( + result.reset_index(drop=True), + expected.reset_index(drop=True), + ) + + +def test_empty_rez(csv_str_to_df): + flow_path_transfer_capability = csv_str_to_df(""" + Flow Paths + CQ-NQ + """) + + renewable_energy_zones = pd.DataFrame(columns=_REZ_COLUMNS) + + result = _template_network_transmission_paths( + flow_path_transfer_capability, renewable_energy_zones + ) + + expected = csv_str_to_df(""" + path_id, geo_from, geo_to, carrier + CQ-NQ, CQ, NQ, AC + """) + + pd.testing.assert_frame_equal( + result.reset_index(drop=True), + expected.reset_index(drop=True), + ) + + +def test_both_empty(): + flow_path_transfer_capability = pd.DataFrame(columns=_FLOW_PATH_COLUMNS) + renewable_energy_zones = pd.DataFrame(columns=_REZ_COLUMNS) + + result = _template_network_transmission_paths( + flow_path_transfer_capability, renewable_energy_zones + ) + + expected = pd.DataFrame(columns=_OUTPUT_COLUMNS) + + pd.testing.assert_frame_equal(result, expected)