Skip to content

Commit 2e47197

Browse files
authored
Merge pull request #98 from andrewdelman/ecco_access_updates
Final (substantial) round of tutorial updates to include use of ecco_access
2 parents fd05d30 + 1ffb914 commit 2e47197

14 files changed

+558
-527
lines changed

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Coordinates_and_Dimensions_of_ECCOv4_NetCDF_files.ipynb

Lines changed: 59 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"\n",
1717
"Before you begin, make sure you have the monthly mean temperature/salinity files for 2010 downloaded. If you have done the previous tutorial about Dataset and DataArray objects, you already have these; if not, you should run at least the first code cell of that tutorial before this one.\n",
1818
"\n",
19-
"As we showed in the previous tutorial, we can use the `open_mfdataset` method from `xarray` to load multiple NetCDF files into Python as a `Dataset` object. `open_mfdataset` is very convenient because it automatically parses and concatenates NetCDF files, constructing a `Dataset` object using all of the dimensions, coordinates, variables, and metadata information. \n",
19+
"As we showed in the previous tutorial, we can use the [`ecco_access` library](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access) which downloads (or retrieves in the AWS Cloud) the requested ECCO output. We can either use the `ecco_podaac_access` function and then explicitly call `xarray`'s `open_mfdataset` to load multiple NetCDF files into Python as a `Dataset` object, or combine both steps into one with `ecco_access`'s `ecco_podaac_to_xrdataset`. This is very convenient because it opens the requested output as an `xarray` Dataset object with all of the dimensions, coordinates, variables, and metadata information. \n",
2020
"\n",
2121
"In the last tutorial we analyzed the contents of the ECCOv4 monthly mean potential temperature and salinity files for the year 2010. Let's load these files up again as the `Dataset` object *theta_dataset*."
2222
]
@@ -32,7 +32,22 @@
3232
"import sys\n",
3333
"import matplotlib.pyplot as plt\n",
3434
"%matplotlib inline\n",
35-
"import json"
35+
"import json\n",
36+
"\n",
37+
"import ecco_access as ea\n",
38+
"\n",
39+
"# indicate mode of access\n",
40+
"# options are:\n",
41+
"# 'download': direct download from internet to your local machine\n",
42+
"# 'download_ifspace': like download, but only proceeds \n",
43+
"# if your machine have sufficient storage\n",
44+
"# 's3_open': access datasets in-cloud from an AWS instance\n",
45+
"# 's3_open_fsspec': use jsons generated with fsspec and \n",
46+
"# kerchunk libraries to speed up in-cloud access\n",
47+
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
48+
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
49+
"# has sufficient storage\n",
50+
"access_mode = 'download_ifspace'"
3651
]
3752
},
3853
{
@@ -65,37 +80,39 @@
6580
"## ================\n",
6681
"\n",
6782
"\n",
68-
"# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
69-
"incloud_access = False\n",
83+
"## Set top-level file directory for the ECCO NetCDF files\n",
84+
"## =================================================================\n",
7085
"\n",
71-
"\n",
72-
"## change ECCO_dir as needed\n",
86+
"## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
7387
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
7488
"\n",
89+
"# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
90+
"# # containing the jsons\n",
91+
"# jsons_root_dir = join('/efs_ecco','mzz-jsons')\n",
7592
"\n",
76-
"ShortNames_list = [\"ECCO_L4_TEMP_SALINITY_LLC0090GRID_MONTHLY_V4R4\"]\n",
77-
"if incloud_access == True:\n",
78-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
7993
"\n",
80-
" # retrieve files (download to instance if there is sufficient storage)\n",
81-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
82-
" StartDate='2010-01',EndDate='2010-12',\\\n",
83-
" max_avail_frac=0.5,\\\n",
84-
" download_root_dir=ECCO_dir)\n",
94+
"ShortName = \"ECCO_L4_TEMP_SALINITY_LLC0090GRID_MONTHLY_V4R4\"\n",
95+
"\n",
96+
"# # Method 1: use ecco_podaac_access\n",
97+
"# # \n",
98+
"# # retrieve files\n",
99+
"# files_dict = ea.ecco_podaac_access(ShortName,\\\n",
100+
"# StartDate='2010-01',EndDate='2010-12',\\\n",
101+
"# mode=access_mode,\\\n",
102+
"# download_root_dir=ECCO_dir,\\\n",
103+
"# max_avail_frac=0.5)\n",
104+
"# # load file into workspace\n",
105+
"# theta_dataset = xr.open_mfdataset(files_dict[ShortName],parallel=True,\\\n",
106+
"# data_vars='minimal',coords='minimal',compat='override')\n",
107+
"\n",
108+
"\n",
109+
"# # Method 2: use ecco_podaac_to_xrdataset\n",
85110
"\n",
86-
" # load file into workspace\n",
87-
" theta_dataset = xr.open_mfdataset(files_dict[ShortNames_list[0]],parallel=True,\\\n",
88-
" data_vars='minimal',coords='minimal',compat='override') \n",
89-
"else:\n",
90-
" curr_dir = join(ECCO_dir,ShortNames_list[0])\n",
91-
" import glob\n",
92-
" \n",
93-
" # find files on disk (assumes that they were downloaded in the last tutorial)\n",
94-
" files_to_load = list(glob.glob(join(curr_dir,'*2010*nc')))\n",
95-
" \n",
96-
" # load file into workspace\n",
97-
" theta_dataset = xr.open_mfdataset(files_to_load, parallel=True,\\\n",
98-
" data_vars='minimal',coords='minimal',compat='override')"
111+
"theta_dataset = ea.ecco_podaac_to_xrdataset(ShortName,\\\n",
112+
" StartDate='2010-01',EndDate='2010-12',\\\n",
113+
" mode=access_mode,\\\n",
114+
" download_root_dir=ECCO_dir,\\\n",
115+
" max_avail_frac=0.5)"
99116
]
100117
},
101118
{
@@ -4205,7 +4222,7 @@
42054222
"source": [
42064223
"## All ECCOv4 coordinates\n",
42074224
"\n",
4208-
"Now that we have been oriented to the dimensions and coordinates used by ECCOv4, let's examine a ``Dataset`` that uses all of them, an ECCOv4r4 NetCDF grid file. The grid file for the native LLC90 grid has ShortName **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4**. It does not have time dimensions, but we can put any StartDate and EndDate between 1992-01-01 and 2018-01-01 into the `ecco_podaac_download` function (or `ecco_podaac_s3_get_diskaware` function in the cloud) and it should retrieve the file. Then the file can be opened; in this case we'll use `open_dataset` that loads a single file into memory in our workspace."
4225+
"Now that we have been oriented to the dimensions and coordinates used by ECCOv4, let's examine a ``Dataset`` that uses all of them, an ECCOv4r4 NetCDF grid file. The grid file for the native LLC90 grid has ShortName **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4**. It does not have time dimensions, so we do not need to specify a StartDate or EndDate, though if they are specified any dates in the ECCOv4 data range (1992-2017) should work. The function `ecco_podaac_to_xrdataset` uses *lazy* opening of the dataset (`open_mfdataset`) by default so it is not loaded into memory; if we want to load the data into our workspace memory we can append `.compute()`."
42094226
]
42104227
},
42114228
{
@@ -4251,30 +4268,13 @@
42514268
"source": [
42524269
"## download file containing grid parameters and load into workspace\n",
42534270
"\n",
4254-
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\"]\n",
4255-
"if incloud_access == True:\n",
4256-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
4257-
"\n",
4258-
" # retrieve file (download to instance if there is sufficient storage)\n",
4259-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
4260-
" StartDate='2010-01',EndDate='2010-12',\\\n",
4261-
" max_avail_frac=0.5,\\\n",
4262-
" download_root_dir=ECCO_dir)\n",
4263-
"\n",
4264-
" # load file into workspace\n",
4265-
" grid_dataset = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
4266-
" \n",
4267-
"else:\n",
4268-
" from ecco_download import ecco_podaac_download\n",
4271+
"ShortName = \"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\"\n",
42694272
"\n",
4270-
" # download grid file\n",
4271-
" file_to_load = ecco_podaac_download(ShortName=grid_params_shortname,\\\n",
4272-
" StartDate=\"2010-01-01\",EndDate=\"2010-01-01\",\\\n",
4273-
" download_root_dir=ECCO_dir,n_workers=6,force_redownload=False,\\\n",
4274-
" return_downloaded_files=True)\n",
4275-
" \n",
4276-
" # load file into workspace\n",
4277-
" grid_dataset = xr.open_dataset(file_to_load)"
4273+
"# retrieve file (download to instance if there is sufficient storage)\n",
4274+
"grid_dataset = ea.ecco_podaac_to_xrdataset(ShortName,\\\n",
4275+
" mode=access_mode,\\\n",
4276+
" download_root_dir=ECCO_dir,\\\n",
4277+
" max_avail_frac=0.5).compute()"
42784278
]
42794279
},
42804280
{
@@ -5387,25 +5387,14 @@
53875387
"source": [
53885388
"## download file containing monthly mean ocean velocities for March 2010, and load into workspace\n",
53895389
"\n",
5390-
"ShortNames_list = [\"ECCO_L4_OCEAN_VEL_LLC0090GRID_MONTHLY_V4R4\"]\n",
5391-
"if incloud_access == True:\n",
5392-
" # retrieve file (download to instance if there is sufficient storage)\n",
5393-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
5394-
" StartDate='2010-03',EndDate='2010-03',\\\n",
5395-
" max_avail_frac=0.5,\\\n",
5396-
" download_root_dir=ECCO_dir)\n",
5390+
"ShortName = \"ECCO_L4_OCEAN_VEL_LLC0090GRID_MONTHLY_V4R4\"\n",
53975391
"\n",
5398-
" # load file into workspace\n",
5399-
" vel_dataset = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
5400-
" \n",
5401-
"else:\n",
5402-
" # download velocity file\n",
5403-
" file_to_load = ecco_podaac_download(ShortName=vel_shortname,\\\n",
5404-
" StartDate='2010-03',EndDate='2010-03',\\\n",
5405-
" download_root_dir=ECCO_dir,n_workers=6,force_redownload=False,re)\n",
5406-
" \n",
5407-
" # load file into workspace\n",
5408-
" vel_dataset = xr.open_dataset(file_to_load)"
5392+
"# retrieve file (download to instance if there is sufficient storage)\n",
5393+
"vel_dataset = ea.ecco_podaac_to_xrdataset(ShortName,\\\n",
5394+
" StartDate='2010-03',EndDate='2010-03',\\\n",
5395+
" mode=access_mode,\\\n",
5396+
" download_root_dir=ECCO_dir,\\\n",
5397+
" max_avail_frac=0.5).compute()"
54095398
]
54105399
},
54115400
{
@@ -6211,7 +6200,7 @@
62116200
"name": "python",
62126201
"nbconvert_exporter": "python",
62136202
"pygments_lexer": "ipython3",
6214-
"version": "3.11.8"
6203+
"version": "3.11.9"
62156204
}
62166205
},
62176206
"nbformat": 4,

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Loading_LLC_compact_binary_files.ipynb

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"To teach how to loading ECCO binary files written by the MITgcm in the llc 'compact' format. \n",
1212
"\n",
13-
"## Introduciton\n",
13+
"## Introduction\n",
1414
"\n",
1515
"When the MITgcm saves diagnostic and other fields to files it does so using the so-called 'compact' format. The compact format distributes the arrays from the 13 lat-lon-cap tiles in a somewhat unintuitive manner. Fortunately, it is not difficult to extract the 13 tiles from 'compact' format files. This tutorial will show you how to use the 'read_llc_to_tiles' subroutine to read and re-organize MITgcm's files written in compact format into a more familiar 13-tile layout."
1616
]
@@ -192,9 +192,7 @@
192192
{
193193
"cell_type": "code",
194194
"execution_count": 4,
195-
"metadata": {
196-
"scrolled": false
197-
},
195+
"metadata": {},
198196
"outputs": [
199197
{
200198
"name": "stdout",
@@ -231,9 +229,7 @@
231229
{
232230
"cell_type": "code",
233231
"execution_count": 5,
234-
"metadata": {
235-
"scrolled": false
236-
},
232+
"metadata": {},
237233
"outputs": [
238234
{
239235
"data": {
@@ -268,8 +264,19 @@
268264
"metadata": {},
269265
"outputs": [],
270266
"source": [
271-
"ecco_grid_dir = '/Users/ifenty/tmp/nctiles_grid/'\n",
272-
"ecco_grid = ecco.load_ecco_grid_nc(input_dir, 'ECCO-GRID.nc')"
267+
"from os.path import join,expanduser\n",
268+
"import ecco_access as ea\n",
269+
"\n",
270+
"access_mode = 'download_ifspace'\n",
271+
"\n",
272+
"## download directory for grid file, currently set to /Downloads/ECCO_V4r4_PODAAC\n",
273+
"user_home_dir = expanduser('~')\n",
274+
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
275+
"\n",
276+
"ecco_grid = ea.ecco_podaac_to_xrdataset(\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
277+
" mode=access_mode,\\\n",
278+
" download_root_dir=ECCO_dir,\\\n",
279+
" max_avail_frac=0.5)"
273280
]
274281
},
275282
{
@@ -509,9 +516,7 @@
509516
{
510517
"cell_type": "code",
511518
"execution_count": 17,
512-
"metadata": {
513-
"scrolled": false
514-
},
519+
"metadata": {},
515520
"outputs": [
516521
{
517522
"data": {
@@ -656,9 +661,7 @@
656661
{
657662
"cell_type": "code",
658663
"execution_count": 23,
659-
"metadata": {
660-
"scrolled": false
661-
},
664+
"metadata": {},
662665
"outputs": [
663666
{
664667
"name": "stdout",
@@ -978,7 +981,7 @@
978981
],
979982
"metadata": {
980983
"kernelspec": {
981-
"display_name": "Python 3",
984+
"display_name": "Python 3 (ipykernel)",
982985
"language": "python",
983986
"name": "python3"
984987
},
@@ -992,9 +995,9 @@
992995
"name": "python",
993996
"nbconvert_exporter": "python",
994997
"pygments_lexer": "ipython3",
995-
"version": "3.8.10"
998+
"version": "3.11.9"
996999
}
9971000
},
9981001
"nbformat": 4,
999-
"nbformat_minor": 2
1002+
"nbformat_minor": 4
10001003
}

Tutorials_as_Jupyter_Notebooks/ECCO_v4_Loading_the_ECCOv4_native_model_grid_parameters.ipynb

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"\n",
1313
"## Introduction\n",
1414
"\n",
15-
"The ECCOv4 model grid parameters are provided as a single NetCDF file. It can be downloaded using the `ecco_podaac_download` function as described in the [download tutorial](https://ecco-v4-python-tutorial.readthedocs.io/Downloading_ECCO_Datasets_from_PODAAC_Python.html#ECCO_download-module:-the-quick-and-easy-method). The ShortName for the dataset is **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4**. The grid parameters file has no time dimension, but `ecco_podaac_download` requires a StartDate and EndDate to be specified; any date in the range 1992-2017 can be used."
15+
"The ECCOv4 model grid parameters are provided as a single NetCDF file. It can be downloaded using the `ecco_access` library, if you have set up the library [in your Python path](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access). The ShortName for the dataset is **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4**."
1616
]
1717
},
1818
{
@@ -38,9 +38,20 @@
3838
"import matplotlib.pyplot as plt\n",
3939
"%matplotlib inline\n",
4040
"\n",
41+
"import ecco_access as ea\n",
4142
"\n",
42-
"# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
43-
"incloud_access = False"
43+
"# indicate mode of access\n",
44+
"# options are:\n",
45+
"# 'download': direct download from internet to your local machine\n",
46+
"# 'download_ifspace': like download, but only proceeds \n",
47+
"# if your machine have sufficient storage\n",
48+
"# 's3_open': access datasets in-cloud from an AWS instance\n",
49+
"# 's3_open_fsspec': use jsons generated with fsspec and \n",
50+
"# kerchunk libraries to speed up in-cloud access\n",
51+
"# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
52+
"# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
53+
"# has sufficient storage\n",
54+
"access_mode = 'download_ifspace'"
4455
]
4556
},
4657
{
@@ -70,9 +81,14 @@
7081
"outputs": [],
7182
"source": [
7283
"## Set top-level file directory for the ECCO NetCDF files\n",
84+
"## =================================================================\n",
7385
"\n",
74-
"## change ECCO_dir as needed\n",
75-
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
86+
"## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
87+
"ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
88+
"\n",
89+
"# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
90+
"# # containing the jsons\n",
91+
"# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
7692
]
7793
},
7894
{
@@ -83,17 +99,27 @@
8399
"source": [
84100
"## open ECCO grid dataset\n",
85101
"\n",
86-
"ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\"]\n",
87-
"if incloud_access == True:\n",
88-
" from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
89-
" files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
90-
" StartDate='1992-01',EndDate='2017-12',\\\n",
91-
" max_avail_frac=0.5,\\\n",
92-
" download_root_dir=ECCO_dir)\n",
93-
" grid = xr.open_mfdataset(files_dict[ShortNames_list[0]])\n",
94-
"else:\n",
95-
" grid_params_file = \"GRID_GEOMETRY_ECCO_V4r4_native_llc0090.nc\"\n",
96-
" grid = xr.open_mfdataset(join(ECCO_dir,ShortNames_list[0],grid_params_file))"
102+
"ShortName = \"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\"\n",
103+
"\n",
104+
"\n",
105+
"## there are 2 ways to do this using ecco_access:\n",
106+
"## use ea.ecco_podaac_access to download (or retrieve) the file and return the file path and name\n",
107+
"## which is then opened using xr.open_dataset or xr.open_mfdataset\n",
108+
"\n",
109+
"# grid_file_dict = ea.ecco_podaac_access(ShortName,\\\n",
110+
"# mode=access_mode,\\\n",
111+
"# download_root_dir=ECCO_dir,\\\n",
112+
"# max_avail_frac=0.5)\n",
113+
"# grid = xr.open_mfdataset(grid_file_dict[ShortName])\n",
114+
"\n",
115+
"\n",
116+
"## OR use ea.ecco_podaac_to_xrdataset, which combines both steps\n",
117+
"## using xr.open_mfdataset to \"lazy\" open the data files\n",
118+
"\n",
119+
"grid = ea.ecco_podaac_to_xrdataset(ShortName,\\\n",
120+
" mode=access_mode,\\\n",
121+
" download_root_dir=ECCO_dir,\\\n",
122+
" max_avail_frac=0.5)"
97123
]
98124
},
99125
{
@@ -9227,7 +9253,7 @@
92279253
"name": "python",
92289254
"nbconvert_exporter": "python",
92299255
"pygments_lexer": "ipython3",
9230-
"version": "3.11.8"
9256+
"version": "3.11.9"
92319257
}
92329258
},
92339259
"nbformat": 4,

0 commit comments

Comments
 (0)