Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions create_trace_mapping.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""Create trace mapping."""

from pathlib import Path

import yaml
from nemosis import static_table

Expand All @@ -20,14 +24,14 @@
solar_generator_mapping = draft_solar_generator_to_trace_mapping(
solar_gens, solar_traces
)
with open("draft_solar_generator_mapping.yaml", "w") as file:
with Path.open("draft_solar_generator_mapping.yaml", "w") as file:
yaml.dump(solar_generator_mapping, file, default_flow_style=False)


solar_traces = "/media/nick/Samsung_T5/isp_2024_data/trace_data/solar/solar_2023"
rezs = gets_rezs(workbook)
solar_rez_mapping = draft_solar_rez_mapping(rezs, solar_traces)
with open("solar_area_mapping.yaml", "w") as file:
with Path.open("solar_area_mapping.yaml", "w") as file:
yaml.dump(solar_rez_mapping, file, default_flow_style=False)

duids_and_station_names = static_table(
Expand All @@ -48,12 +52,12 @@
wind_generator_mapping = draft_wind_generator_to_trace_mapping(
wind_gens, wind_duids_and_station_names, wind_traces
)
with open("draft_wind_generator_mapping.yaml", "w") as file:
with Path.open("draft_wind_generator_mapping.yaml", "w") as file:
yaml.dump(wind_generator_mapping, file, default_flow_style=False, sort_keys=False)


wind_traces = "D:/isp_2024_data/trace_data/wind/wind_2023"
rezs = gets_rezs(workbook)
wind_rez_mapping = draft_wind_rez_mapping(rezs, wind_traces)
with open("draft_wind_rez_mapping.yaml", "w") as file:
with Path.open("draft_wind_rez_mapping.yaml", "w") as file:
yaml.dump(wind_rez_mapping, file, default_flow_style=False)
35 changes: 13 additions & 22 deletions generator_to_trace_draft_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,23 @@ def get_all_generators(workbook_filepath):
additional_gens["Status"] = "additional"

existing_gens = existing_gens.rename(
columns={existing_gens.columns.values[0]: "Generator"}
columns={existing_gens.columns.to_numpy[0]: "Generator"}
Copy link
Copy Markdown
Member

@dylanjmcconnell dylanjmcconnell Apr 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.. fyi, .to_numpy is a method.. so should be .to_numpy()[0] (this is a known unsafe auto-fix)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops!

)
committed_gens = committed_gens.rename(
columns={committed_gens.columns.values[0]: "Generator"}
columns={committed_gens.columns.to_numpy[0]: "Generator"}
)
anticipated_gens = anticipated_gens.rename(
columns={anticipated_gens.columns.values[0]: "Generator"}
columns={anticipated_gens.columns.to_numpy[0]: "Generator"}
)
additional_gens = additional_gens.rename(
columns={additional_gens.columns.values[0]: "Generator"}
columns={additional_gens.columns.to_numpy[0]: "Generator"}
)

all_gens = pd.concat(
[existing_gens, committed_gens, anticipated_gens, additional_gens]
)

all_gens = all_gens.loc[:, ["Generator", "Technology type"]]

return all_gens
return all_gens.loc[:, ["Generator", "Technology type"]]


def gets_rezs(workbook_filepath):
Expand All @@ -53,15 +51,12 @@ def gets_rezs(workbook_filepath):
)
workbook = Parser(workbook_filepath)
rezs = workbook.get_table_from_config(table_config)
rezs = rezs.loc[:, ["Name"]]
return rezs
return rezs.loc[:, ["Name"]]


def find_best_match(plant_name, csv_files):
best_match = process.extractOne(plant_name, csv_files, scorer=fuzz.token_set_ratio)
best_match = best_match[0] if best_match else None
best_match = best_match
return best_match
return best_match[0] if best_match else None


def find_best_match_two_columns(row, csv_files):
Expand Down Expand Up @@ -91,17 +86,15 @@ def draft_solar_generator_to_trace_mapping(solar_generators, solar_trace_directo
solar_generators["CSVFile"] = solar_generators["Generator"].apply(
lambda x: find_best_match(x, csv_project_names)
)
solar_generators = solar_generators.set_index("Generator")["CSVFile"].to_dict()
return solar_generators
return solar_generators.set_index("Generator")["CSVFile"].to_dict()


def draft_solar_rez_mapping(rezs, rezs_trace_directory):
csv_file_names = [f for f in os.listdir(rezs_trace_directory) if f.endswith(".csv")]
csv_file_metadata = [extract_solar_trace_metadata(f) for f in csv_file_names]
csv_rez_names = [f["name"] for f in csv_file_metadata if f["file_type"] == "area"]
rezs["CSVFile"] = rezs["Name"].apply(lambda x: find_best_match(x, csv_rez_names))
rezs = rezs.set_index("Name")["CSVFile"].to_dict()
return rezs
return rezs.set_index("Name")["CSVFile"].to_dict()


def draft_wind_generator_to_trace_mapping(
Expand All @@ -118,8 +111,8 @@ def draft_wind_generator_to_trace_mapping(
wind_generators["Station Name"] = wind_generators["Generator"].apply(
lambda x: find_best_match(x, wind_station_names)
)
wind_generators = pd.merge(
wind_generators, wind_duids_and_station_names, how="left", on="Station Name"
wind_generators = wind_generators.merge(
wind_duids_and_station_names, how="left", on="Station Name"
)
wind_generators = wind_generators.drop_duplicates(["Generator"])

Expand All @@ -131,14 +124,12 @@ def draft_wind_generator_to_trace_mapping(
:, ["Generator", "Station Name", "DUID", "CSVFile"]
]

wind_generators = wind_generators.set_index("Generator").to_dict(orient="index")
return wind_generators
return wind_generators.set_index("Generator").to_dict(orient="index")


def draft_wind_rez_mapping(rezs, rezs_trace_directory):
csv_file_names = [f for f in os.listdir(rezs_trace_directory) if f.endswith(".csv")]
csv_file_metadata = [extract_wind_trace_metadata(f) for f in csv_file_names]
csv_rez_names = [f["name"] for f in csv_file_metadata if f["file_type"] == "area"]
rezs["CSVFile"] = rezs["Name"].apply(lambda x: find_best_match(x, csv_rez_names))
rezs = rezs.set_index("Name")["CSVFile"].to_dict()
return rezs
return rezs.set_index("Name")["CSVFile"].to_dict()
2 changes: 2 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Nox configuration file."""

import nox

nox.options.default_venv_backend = "uv"
Expand Down
2 changes: 1 addition & 1 deletion src/isp_trace_parser/construct_reference_year_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ def construct_reference_year_mapping(
reference_years = (
reference_years * full_reference_year_cycles
) + reference_years[:partial_cycle_length]
return dict(zip(years, reference_years))
return dict(zip(years, reference_years, strict=True))
26 changes: 13 additions & 13 deletions src/isp_trace_parser/demand_traces.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
import os
from pathlib import Path
from typing import Literal, Optional
from typing import Literal

import polars as pl
import yaml
Expand Down Expand Up @@ -47,15 +47,16 @@ class DemandMetadataFilter(BaseModel):
reference_year: list of ints specifying reference_years
"""

subregion: Optional[list[str]] = None
scenario: Optional[
subregion: list[str] | None = None
scenario: (
list[Literal["Step Change", "Progressive Change", "Green Energy Exports"]]
] = None
poe: Optional[list[Literal["POE50", "POE10"]]] = None
demand_type: Optional[
list[Literal["OPSO_MODELLING", "OPSO_MODELLING_PVLITE", "PV_TOT"]]
] = None
reference_year: Optional[list[int]] = None
| None
) = None
poe: list[Literal["POE50", "POE10"]] | None = None
demand_type: (
list[Literal["OPSO_MODELLING", "OPSO_MODELLING_PVLITE", "PV_TOT"]] | None
) = None
reference_year: list[int] | None = None


@validate_call
Expand Down Expand Up @@ -135,10 +136,9 @@ def parse_demand_traces(

files = get_all_filepaths(input_directory)

with open(
with Path.open(
Path(__file__).parent.parent
/ Path("isp_trace_name_mapping_configs/demand_scenario_mapping.yaml"),
"r",
/ Path("isp_trace_name_mapping_configs/demand_scenario_mapping.yaml")
) as f:
demand_scenario_mapping = yaml.safe_load(f)

Expand Down Expand Up @@ -276,4 +276,4 @@ def extract_metadata_for_all_demand_files(
A dictionary with filepaths as keys and metadata dicts as values.
"""
file_metadata = [extract_demand_trace_metadata(str(f.name)) for f in filenames]
return dict(zip(filenames, file_metadata))
return dict(zip(filenames, file_metadata, strict=True))
56 changes: 28 additions & 28 deletions src/isp_trace_parser/get_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from pathlib import Path
from typing import List, Literal
from typing import Literal

import pandas as pd
import polars as pl
Expand Down Expand Up @@ -37,19 +37,20 @@ def _year_range_to_dt_range(
end_year, 7, 1
)

elif year_type == "calendar":
if year_type == "calendar":
return datetime.datetime(start_year, 1, 1), datetime.datetime(
end_year + 1, 1, 1
)
raise ValueError(year_type)


def _query_parquet_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
directory: str | Path,
filters: dict[str, any] = None,
select_columns: list[str] = None,
filters: dict[str, any] | None = None,
select_columns: list[str] | None = None,
year_type: Literal["fy", "calendar"] = "fy",
) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -103,14 +104,14 @@ def _query_parquet_single_reference_year(
# Otherwise select all columns
columns_to_select = df_lazy.columns

df = (
dframe = (
df_lazy.filter(filter_expr)
.select(*columns_to_select)
.sort("datetime")
.collect()
)

return df.to_pandas()
return dframe.to_pandas()


def _query_parquet_multiple_reference_years(
Expand All @@ -136,19 +137,18 @@ def _query_parquet_multiple_reference_years(
start_year=year, end_year=year, reference_year=reference_year, **kwargs
)
)
data = pd.concat(data).reset_index(drop=True)
return data
return pd.concat(data).reset_index(drop=True)


@validate_call
def get_project_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
project: str | List,
project: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query project trace data for a single reference year.
Expand Down Expand Up @@ -237,11 +237,11 @@ def get_zone_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
zone: str | List,
resource_type: str | List,
zone: str | list,
resource_type: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query zone trace data for a single reference year.
Expand Down Expand Up @@ -333,13 +333,13 @@ def get_demand_single_reference_year(
start_year: int,
end_year: int,
reference_year: int,
scenario: str | List,
subregion: str | List,
demand_type: str | List,
poe: str | List,
scenario: str | list,
subregion: str | list,
demand_type: str | list,
poe: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query demand trace data for a single reference year.
Expand Down Expand Up @@ -441,10 +441,10 @@ def get_demand_single_reference_year(
@validate_call
def get_project_multiple_reference_years(
reference_year_mapping: dict[int, int],
project: str | List,
project: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query project trace data across multiple reference years.
Expand Down Expand Up @@ -530,11 +530,11 @@ def get_project_multiple_reference_years(
@validate_call
def get_zone_multiple_reference_years(
reference_year_mapping: dict[int, int],
zone: str | List,
resource_type: str | List,
zone: str | list,
resource_type: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query zone trace data across multiple reference years.
Expand Down Expand Up @@ -623,13 +623,13 @@ def get_zone_multiple_reference_years(
@validate_call
def get_demand_multiple_reference_years(
reference_year_mapping: dict[int, int],
scenario: str | List,
subregion: str | List,
demand_type: str | List,
poe: str | List,
scenario: str | list,
subregion: str | list,
demand_type: str | list,
poe: str | list,
directory: str | Path,
year_type: Literal["fy", "calendar"] = "fy",
select_columns: list[str] = None,
select_columns: list[str] | None = None,
):
"""
Query demand trace data across multiple reference years.
Expand Down
11 changes: 7 additions & 4 deletions src/isp_trace_parser/input_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
def input_directory(path: Path | str) -> Path:
path = is_valid_path(path)
if not path.is_dir():
raise ValueError(f"Directory {path} does not exist")
msg = f"Directory {path} does not exist"
raise ValueError(msg)
return path


Expand All @@ -16,9 +17,11 @@ def is_valid_path(path: str | Path) -> Path:
try:
return Path(path)
except (TypeError, ValueError):
raise ValueError(f"Invalid parsed directory path: {path}")
msg = f"Invalid parsed directory path: {path}"
raise ValueError(msg) from None


def start_year_before_end_year(start_year, end_year):
def start_year_before_end_year(start_year, end_year) -> None:
if end_year < start_year:
raise ValueError(f"Start year {end_year} < end year {start_year}")
msg = f"Start year {end_year} < end year {start_year}"
raise ValueError(msg)
Loading