switch-model
diff --git a/‎docs/Pandas.md‎
Lines changed: 37 additions & 2 deletions b/‎docs/Pandas.md‎
Lines changed: 37 additions & 2 deletions
diff --git a/‎papers/Martin_Staadecker_Value_of_LDES_and_Factors/LDES_paper_graphs/Figure 1.py‎
Lines changed: 184 additions & 0 deletions b/‎papers/Martin_Staadecker_Value_of_LDES_and_Factors/LDES_paper_graphs/Figure 1.py‎
Lines changed: 184 additions & 0 deletions
@@ -108,6 +108,39 @@ where the columns over which we are merging are `key_1` and `key_2`.
 
 - `Series.unique()`: Returns a series where duplicate values are dropped.
 
+## Note on reading switch files
+
+When reading SWITCH csv files, it is recommended to use the following arguments in `pd.read_csv()`.
+
+- `index_col=False`. This forces Pandas to not automatically use the 
+  first column as an index to ensure you are not using custom indexes 
+  (See notes on custom indexes above).
+  
+- `dtype={"GENERATION_PROJECT": str}`: If all the generation project IDs happen to be
+  numbers, then Pandas will automatically set the `GENERATION_PROJECT` column type
+  to `int`. However, we don't want this since this may cause issues when dealing with
+  multiple dataframes, some of which have non-numeric IDs. (E.g. if you try merging
+  a Dataframe where `GENERATION_PROJECT` is an `int` with another where it's a `str`, it
+  won't work properly.)
+  
+- `dtype=str`: An even safer option than `dtype={"GENERATION_PROJECT": str}` is `dtype=str` instead.
+  This is particularly important when reading a file that will than be re-outputed with minimal changes.
+  Without this option, there's the risk of floating point values being slightly 
+  modified (see [here](https://github.com/pandas-dev/pandas/issues/16452)) or integer columns
+  containing na values (`.`) being ["promoted"](https://pandas.pydata.org/pandas-docs/stable/user_guide/gotchas.html?highlight=nan#na-type-promotions) 
+  to floats. Note that with `dtype=str`, all columns are strings so to do mathematical
+  computation on a column it will first need to be converted with `.astype()`.
+  
+- `na_values="."`. Switch uses full stops to indicate an unspecified value. We want Pandas
+  to interpret full stops as `NaN` rather than the string `.` so that the column type is
+  still properly interpreted rather than being detected as a string.
+  
+Combining these parameters, here is an example of how to read a switch file.
+
+```
+df = pd.read_csv("some_SWITCH_file.csv", index_col=False, dtype={"GENERATION_PROJECT": str}, na_values=".")
+```
+
 ## Example
 
 This example shows how we can use Pandas to generate a more useful view
@@ -117,9 +150,11 @@ of our generation plants from the SWITCH input files.
 import pandas as pd
 
 # READ
+# See note above on why we use these parameters
 kwargs = dict(
   index_col=False,
-  dtype={"GENERATION_PROJECT": str},  # This ensures that the project id column is read as a string not an int
+  dtype={"GENERATION_PROJECT": str},
+  na_values=".", 
 )
 gen_projects = pd.read_csv("generation_projects_info.csv", **kwargs)
 costs = pd.read_csv("gen_build_costs.csv", **kwargs)
@@ -138,7 +173,7 @@ gen_projects = gen_projects.merge(
 )
 
 # FILTER
-# When uncommented will filter out all the projects that aren't wind.
+# When uncommented, this line will filter out all the projects that aren't wind.
 # gen_projects = gen_projects[gen_projects["gen_energy_source"] == "Wind"]
 
 # WRITE
 
@@ -0,0 +1,184 @@
+# %%
+
+# Imports
+import pandas as pd
+from matplotlib.ticker import PercentFormatter
+
+from papers.Martin_Staadecker_Value_of_LDES_and_Factors.LDES_paper_graphs.util import (
+    get_scenario,
+    set_style,
+)
+from switch_model.tools.graph.main import GraphTools
+
+set_style()
+
+# Prepare graph tools
+tools = GraphTools(
+    scenarios=[
+        get_scenario("1342", name=1.94),
+        get_scenario("M7", name=2),
+        get_scenario("M6", name=4),
+        get_scenario("M5", name=8),
+        get_scenario("M4", name=16),
+        get_scenario("M3", name=32),
+        get_scenario("M2", name=64),
+    ]
+)
+tools.pre_graphing(multi_scenario=True)
+
+# Specify formatting and get figure
+fig = tools.get_figure(size=(12, 12))
+ax1 = fig.add_subplot(2, 2, 1)
+ax2 = fig.add_subplot(2, 2, 2)
+ax3 = fig.add_subplot(2, 2, 3)
+ax4 = fig.add_subplot(2, 2, 4)
+
+# %%
+
+ax = ax1
+ax.clear()
+ax.tick_params(top=False, bottom=False, right=False, left=False, which="major")
+
+df = tools.get_dataframe(
+    "load_balance.csv",
+    usecols=[
+        "timestamp",
+        "normalized_energy_balance_duals_dollar_per_mwh",
+        "scenario_name",
+    ],
+).rename(columns={"normalized_energy_balance_duals_dollar_per_mwh": "value"})
+# df = df[df["scenario_name"] != "1.94"]
+df = tools.transform.timestamp(df)
+df = df.groupby(["scenario_name", "hour"], as_index=False)["value"].mean()
+df = df.pivot(index="hour", columns="scenario_name", values="value")
+df = df.rename_axis("Storage Capacity (TWh)", axis=1)
+df.loc[24] = df.loc[0]
+df *= 0.1  # Convert from $/MWh to cents/kWh
+df.plot(
+    ax=ax,
+    colormap="viridis",
+    xlabel="Time of Day (PST)",
+    marker=".",
+    ylabel="Normalized Duals (\xa2/kWh)",
+)
+ax.set_xlim(0, 24)
+ax.set_ylim(0, df.max().max() * 1.05)
+ax.set_title("A. Mean Energy Balance Duals by Time of Day")
+ax.set_xticks([0, 4, 8, 12, 16, 20, 24])
+# %%
+ax = ax2
+ax.clear()
+ax.tick_params(top=False, bottom=False, right=False, left=False, which="both")
+
+df = tools.get_dataframe(
+    "load_balance.csv",
+    usecols=[
+        "timestamp",
+        "normalized_energy_balance_duals_dollar_per_mwh",
+        "scenario_name",
+    ],
+).rename(columns={"normalized_energy_balance_duals_dollar_per_mwh": "value"})
+# df = df[df["scenario_name"] != "1.94"]
+df = df.groupby(["scenario_name", "timestamp"], as_index=False).mean()
+df = tools.transform.timestamp(df)
+df = df.set_index("datetime")
+df = (
+    df.groupby("scenario_name", as_index=False)
+    .rolling("7D", center=True)["value"]
+    .mean()
+)
+df = df.unstack("scenario_name").rename_axis("Storage Capacity (TWh)", axis=1)
+# Convert from $/MWh to cents/kWh
+df *= 0.1
+df.plot(
+    ax=ax,
+    colormap="viridis",
+    xlabel="Month of Year",
+    ylabel="Normalized Duals (\xa2/kWh)",
+)
+ax.set_title("B. Mean Energy Balance Duals Throughout the Year")
+# %%
+
+ax = ax3
+ax.clear()
+ax.tick_params(top=False, bottom=False, right=False, left=False, which="both")
+
+# Calculate transmission
+tx = tools.get_dataframe(
+    "transmission.csv",
+    usecols=["BuildTx", "trans_length_km", "scenario_name"],
+    convert_dot_to_na=True,
+).fillna(0)
+tx["BuildTx"] *= tx["trans_length_km"]
+tx = tx.groupby("scenario_name")["BuildTx"].sum().rename("Transmission")
+
+# Get new buildout
+buildout = tools.get_dataframe("BuildGen.csv").rename(
+    columns={"GEN_BLD_YRS_1": "GENERATION_PROJECT"}
+)
+# Keep only latest year
+buildout = buildout[buildout["GEN_BLD_YRS_2"] == 2050]
+# Merge with projects to get gen_type
+projects = tools.get_dataframe(
+    "generation_projects_info.csv",
+    from_inputs=True,
+    usecols=["GENERATION_PROJECT", "gen_tech", "gen_energy_source", "scenario_name"],
+)
+buildout = buildout.merge(
+    projects,
+    on=["GENERATION_PROJECT", "scenario_name"],
+    validate="one_to_one",
+    how="left",
+)
+del projects
+buildout = tools.transform.gen_type(buildout)
+# Filter out storage since it's not considered generation
+buildout = buildout[buildout["gen_type"] != "Storage"]
+# Sum accross the entire scenario
+buildout = buildout.groupby("scenario_name")["BuildGen"].sum().rename("Generation")
+
+# Merge into same dataframe
+df = pd.concat([tx, buildout], axis=1)
+
+# Convert to percent against baseline
+df = (df / df.iloc[0] - 1) * 100
+
+# Plot
+df.plot(ax=ax, marker=".")
+ax.set_ylabel("Change in Capacity Built Compared to Baseline")
+ax.yaxis.set_major_formatter(PercentFormatter())
+ax.set_xlabel("WECC-wide Storage Capacity (TWh)")
+ax.set_title("C. Impact of Storage on Transmission & Generation Investments")
+ax.set_ylim(-100, 0)
+# %%
+
+# Read dispatch.csv
+ax = ax4
+ax.clear()
+df = tools.get_dataframe(
+    "dispatch.csv",
+    usecols=[
+        "gen_tech",
+        "gen_energy_source",
+        "Curtailment_MW",
+        "is_renewable",
+        "tp_weight_in_year_hrs",
+        "scenario_name",
+    ],
+    na_filter=False,  # For performance
+)
+# Keep only renewable
+df = df[df["is_renewable"]]
+# Add the gen_type column
+df = tools.transform.gen_type(df)
+# Convert to GW
+df["value"] = df["Curtailment_MW"] * df["tp_weight_in_year_hrs"] / 1000
+df = df.groupby(["scenario_name", "gen_type"], as_index=False).value.sum()
+df = df.pivot(index="scenario_name", columns="gen_type", values="value")
+df /= 1000
+df = df.rename_axis("Technology", axis=1)
+df.plot(ax=ax, color=tools.get_colors(), marker=".")
+ax.set_ylabel("Yearly Curtailment (GWh)")
+ax.set_xlabel("WECC-wide Storage Capacity (TWh)")
+ax.set_title("D. Impact of Storage on Curtailment")
+ax.tick_params(top=False, bottom=False, right=False, left=False)