Skip to content

Commit fd05d30

Browse files
authored
Merge pull request #97 from andrewdelman/ecco_access_updates
fix to ecco_access bug for download_ifspace mode, and ecco_access updates in tutorials
2 parents bb38cad + 5e8187e commit fd05d30

19 files changed

+4812
-819
lines changed

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Accessing_and_Subsetting_Variables.ipynb

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@
2020
"\n",
2121
"Both methods work identically to access *Dimensions*, *Coordinates*, and *Data variables*. Accessing *Attribute* variables requires a slightly different approach as we will see.\n",
2222
"\n",
23-
"For this tutorial make sure that you have the 2010 monthly granules of the SSH and OBP datasets downloaded, with ShortNames:\n",
23+
"For this tutorial you will need the 2010 monthly granules of the SSH and OBP datasets downloaded, with ShortNames\n",
2424
"\n",
2525
"- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4**\n",
2626
"- **ECCO_L4_OBP_LLC0090GRID_MONTHLY_V4R4**\n",
2727
"\n",
28-
"You will also need the [grid file](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_native_model_grid_parameters.html) downloaded if you don't have it already.\n",
28+
"as well as the [grid file](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_native_model_grid_parameters.html). The `ecco_access` library used in the notebook will handle download or retrieval of the necessary data, if you have set up the library [in your Python path](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access).\n",
2929
"\n",
3030
"\n",
3131
"## Accessing fields inside `Dataset` and `DataArray` objects\n",
@@ -47,12 +47,23 @@
4747
"import json\n",
4848
"import glob\n",
4949
"\n",
50+
"import warnings\n",
51+
"warnings.filterwarnings('ignore')\n",
5052
"\n",
51-
"# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
52-
"incloud_access = False\n",
53+
"import ecco_access as ea\n",
5354
"\n",
54-
"import warnings\n",
55-
"warnings.filterwarnings('ignore')"
55+
"# indicate mode of access\n",
56+
"# options are:\n",
57+
"# 'download': direct download from internet to your local machine\n",
58+
"# 'download_ifspace': like download, but only proceeds \n",
59+
"# if your machine have sufficient storage\n",
60+
"# 's3_open': access datasets in-cloud from an AWS instance\n",
61+
"# 's3_open_fsspec': use jsons generated with fsspec and \n",
62+
"# kerchunk libraries to speed up in-cloud access\n",
63+
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
64+
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
65+
"# has sufficient storage\n",
66+
"access_mode = 'download_ifspace'"
5667
]
5768
},
5869
{
@@ -85,9 +96,12 @@
8596
"## Set top-level file directory for the ECCO NetCDF files\n",
8697
"## =================================================================\n",
8798
"\n",
88-
"## currently set to ~/Downloads/ECCO_V4r4_PODAAC, \n",
89-
"## the default if ecco_podaac_download was used to download dataset granules\n",
90-
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
99+
"## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
100+
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
101+
"\n",
102+
"# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
103+
"# # containing the jsons\n",
104+
"# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
91105
]
92106
},
93107
{
@@ -101,24 +115,16 @@
101115
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
102116
" \"ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4\",\\\n",
103117
" \"ECCO_L4_OBP_LLC0090GRID_MONTHLY_V4R4\"]\n",
104-
"if incloud_access == True:\n",
105-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
106-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
118+
"\n",
119+
"ds_dict = ea.ecco_podaac_to_xrdataset(ShortNames_list,\\\n",
107120
" StartDate='2010-01',EndDate='2010-12',\\\n",
108-
" max_avail_frac=0.5,\\\n",
109-
" download_root_dir=ECCO_dir)\n",
110-
" ecco_grid = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
111-
" ds_SSH = xr.open_mfdataset(files_dict[ShortNames_list[1]],\\\n",
112-
" compat='override',data_vars='minimal',coords='minimal')\n",
113-
" ds_OBP = xr.open_mfdataset(files_dict[ShortNames_list[2]],\\\n",
114-
" compat='override',data_vars='minimal',coords='minimal')\n",
115-
"else:\n",
116-
" ecco_grid = xr.open_dataset(glob.glob(join(ECCO_dir,ShortNames_list[0],'*.nc'))[0])\n",
117-
" ds_SSH = xr.open_mfdataset(glob.glob(join(ECCO_dir,ShortNames_list[1],'*2010-*.nc')),\\\n",
118-
" compat='override',data_vars='minimal',coords='minimal')\n",
119-
" ds_OBP = xr.open_mfdataset(glob.glob(join(ECCO_dir,ShortNames_list[2],'*2010-*.nc')),\\\n",
120-
" compat='override',data_vars='minimal',coords='minimal')\n",
121+
" mode=access_mode,\\\n",
122+
" download_root_dir=ECCO_dir,\\\n",
123+
" max_avail_frac=0.5)\n",
121124
"\n",
125+
"ecco_grid = ds_dict[ShortNames_list[0]]\n",
126+
"ds_SSH = ds_dict[ShortNames_list[1]]\n",
127+
"ds_OBP = ds_dict[ShortNames_list[2]]\n",
122128
"\n",
123129
"## Merge the grid and variable datasets to make the ecco_ds\n",
124130
"ecco_ds = xr.merge((ecco_grid,ds_SSH,ds_OBP)) "
@@ -3934,7 +3940,7 @@
39343940
"name": "python",
39353941
"nbconvert_exporter": "python",
39363942
"pygments_lexer": "ipython3",
3937-
"version": "3.11.8"
3943+
"version": "3.11.9"
39383944
}
39393945
},
39403946
"nbformat": 4,

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Combining_Multiple_Datasets.ipynb

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@
1414
"\n",
1515
"In previous tutorials we've loaded single lat-lon-cap NetCDF tile files (granules) for ECCO state estimate variables and model grid parameters. Here we will demonstrate merging `Datasets` together. Some benefits of merging `Datasets` include having a tidier workspace and simplifying subsetting operations (e.g., using ``xarray.isel`` or ``xarray.sel`` as shown in the [previous tutorial](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_state_estimate_fields_on_the_native_model_grid.html)). \n",
1616
"\n",
17-
"First, we'll load three ECCOv4 NetCDF state estimate variables (each centered on different coordinates) as well as the model grid file. For this, you will need to download 2 datasets of monthly mean fields for the year 2010. The ShortNames for the 2 datasets are:\n",
17+
"First, we'll load three ECCOv4 NetCDF state estimate variables (each centered on different coordinates) as well as the model grid file. For this, you will need 2 datasets of monthly mean fields for the year 2010, as well as the grid parameters file. The ShortNames for the datasets are:\n",
1818
"\n",
19-
"- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4**\n",
20-
"- **ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4**\n",
19+
"- **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4** (no time dimension)\n",
20+
"- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4** (Jan-Dec 2010)\n",
21+
"- **ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4** (Jan-Dec 2010)\n",
2122
"\n",
22-
"If you did the previous tutorial you already have the SSH files.\n",
23+
"The `ecco_access` library used in the notebook will handle download or retrieval of the necessary data, if you have set up the library [in your Python path](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access).\n",
2324
"\n",
24-
"Once you have the required ECCOv4 output downloaded, let's define our environment."
25+
"Let's define our environment:"
2526
]
2627
},
2728
{
@@ -36,9 +37,20 @@
3637
"import matplotlib.pyplot as plt\n",
3738
"import json\n",
3839
"\n",
40+
"import ecco_access as ea\n",
3941
"\n",
40-
"# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
41-
"incloud_access = False"
42+
"# indicate mode of access\n",
43+
"# options are:\n",
44+
"# 'download': direct download from internet to your local machine\n",
45+
"# 'download_ifspace': like download, but only proceeds \n",
46+
"# if your machine have sufficient storage\n",
47+
"# 's3_open': access datasets in-cloud from an AWS instance\n",
48+
"# 's3_open_fsspec': use jsons generated with fsspec and \n",
49+
"# kerchunk libraries to speed up in-cloud access\n",
50+
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
51+
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
52+
"# has sufficient storage\n",
53+
"access_mode = 'download_ifspace'"
4254
]
4355
},
4456
{
@@ -72,9 +84,12 @@
7284
"## Set top-level file directory for the ECCO NetCDF files\n",
7385
"## =================================================================\n",
7486
"\n",
75-
"## currently set to ~/Downloads/ECCO_V4r4_PODAAC, \n",
76-
"## the default if ecco_podaac_download was used to download dataset granules\n",
77-
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
87+
"## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
88+
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
89+
"\n",
90+
"# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
91+
"# # containing the jsons\n",
92+
"# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
7893
]
7994
},
8095
{
@@ -83,17 +98,17 @@
8398
"metadata": {},
8499
"outputs": [],
85100
"source": [
86-
"## if working in the AWS cloud, access datasets needed for this tutorial\n",
101+
"## Access datasets needed for this tutorial\n",
87102
"\n",
88103
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
89104
" \"ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4\",\\\n",
90105
" \"ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4\"]\n",
91-
"if incloud_access == True:\n",
92-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
93-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
106+
"\n",
107+
"ds_dict = ea.ecco_podaac_to_xrdataset(ShortNames_list,\\\n",
94108
" StartDate='2010-01',EndDate='2010-12',\\\n",
95-
" max_avail_frac=0.5,\\\n",
96-
" download_root_dir=ECCO_dir)"
109+
" mode=access_mode,\\\n",
110+
" download_root_dir=ECCO_dir,\\\n",
111+
" max_avail_frac=0.5)"
97112
]
98113
},
99114
{
@@ -110,12 +125,7 @@
110125
"outputs": [],
111126
"source": [
112127
"# load dataset containing monthly SSH in 2010\n",
113-
"if incloud_access == True:\n",
114-
" # use list comprehension to list file path(s)\n",
115-
" file_paths = [filepath for filepath in files_dict[ShortNames_list[1]] if '_2010-' in filepath]\n",
116-
" ecco_dataset_A = xr.open_mfdataset(file_paths)\n",
117-
"else:\n",
118-
" ecco_dataset_A = xr.open_mfdataset(join(ECCO_dir,'*SSH*MONTHLY*','*_2010-??_*.nc'))"
128+
"ecco_dataset_A = ds_dict[ShortNames_list[1]]"
119129
]
120130
},
121131
{
@@ -224,12 +234,8 @@
224234
}
225235
],
226236
"source": [
227-
"# load dataset containing monthly mean 3D temperature fluxes in 2010\n",
228-
"if incloud_access == True:\n",
229-
" file_paths = [filepath for filepath in files_dict[ShortNames_list[2]] if '_2010-' in filepath]\n",
230-
" ecco_dataset_B = xr.open_mfdataset(file_paths)\n",
231-
"else:\n",
232-
" ecco_dataset_B = xr.open_mfdataset(join(ECCO_dir,'*3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY*','*_2010-??_*.nc'))\n",
237+
"# open dataset containing monthly mean 3D temperature fluxes in 2010\n",
238+
"ecco_dataset_B = ds_dict[ShortNames_list[2]]\n",
233239
"\n",
234240
"ecco_dataset_B.data_vars"
235241
]
@@ -1568,7 +1574,7 @@
15681574
},
15691575
"outputs": [],
15701576
"source": [
1571-
"# merge together\n",
1577+
"# merge together and load into memory\n",
15721578
"ecco_dataset_AB = xr.merge([ecco_dataset_A['SSH'], ecco_dataset_B[['ADVx_TH','ADVy_TH']]]).compute()"
15731579
]
15741580
},
@@ -2499,11 +2505,8 @@
24992505
],
25002506
"source": [
25012507
"# Load the llc90 grid parameters\n",
2502-
"if incloud_access == True:\n",
2503-
" grid_dataset = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
2504-
"else:\n",
2505-
" import glob\n",
2506-
" grid_dataset = xr.open_dataset(glob.glob(join(ECCO_dir,'*GEOMETRY*','*.nc'))[0])\n",
2508+
"grid_dataset = ds_dict[ShortNames_list[0]].compute()\n",
2509+
"\n",
25072510
"grid_dataset.coords"
25082511
]
25092512
},
@@ -3382,7 +3385,7 @@
33823385
"name": "python",
33833386
"nbconvert_exporter": "python",
33843387
"pygments_lexer": "ipython3",
3385-
"version": "3.11.8"
3388+
"version": "3.11.9"
33863389
}
33873390
},
33883391
"nbformat": 4,

0 commit comments

Comments
 (0)