Skip to content

Commit 7e22579

Browse files
authored
Merge pull request #101 from andrewdelman/ecco_access_v4r5
Added capability to access v4r5 output from ecco-model-granules S3 bucket
2 parents ae7b717 + e4c649c commit 7e22579

File tree

3 files changed

+351
-145
lines changed

3 files changed

+351
-145
lines changed

ecco_access/ecco_access.py

Lines changed: 77 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
4444
will query the native grid monthly SSH datasets, and all native grid
4545
monthly datasets with variables or descriptions matching 'THETA'.
4646
47-
version: ('v4r4'), specifies ECCO version to query
47+
version: ('v4r4','v4r5'), specifies ECCO version to query.
48+
Currently 'v4r5' only works with ['s3_open','s3_get','s3_get_ifspace'] modes,
49+
or if the files are already stored in download_root_dir/ShortName/.
50+
Otherwise an error is returned.
51+
'v4r5' only has grid='native' and time_res='monthly' data files available.
4852
4953
grid: ('native','latlon',None), specifies whether to query datasets with output
5054
on the native grid or the interpolated lat/lon grid.
@@ -120,7 +124,17 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
120124
121125
force_redownload: bool, if True, existing files will be redownloaded and replaced;
122126
if False (default), existing files will not be replaced.
123-
127+
128+
show_noredownload_msg: bool, if True (default), and force_redownload=False,
129+
display message for each file that is already
130+
downloaded (and therefore not re-downloaded);
131+
if False, these messages are not shown.
132+
133+
prompt_request_payer: bool, if True (default), user is prompted to approve
134+
(by entering "y" or "Y") any access to a
135+
requester pays bucket, otherwise request is canceled;
136+
if False, data access proceeds without prompting.
137+
124138
return_granules: bool, if True (default), str or list of queried or
125139
downloaded granules/files (including ones that
126140
were already on disk and not replaced) is returned.
@@ -139,23 +153,50 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
139153
140154
"""
141155

156+
142157
pass
143158

144159

145160
## query varlists as needed to obtain shortnames
146161

147-
def shortnames_find(query_list,grid,time_res):
162+
def shortnames_find(query_list,version,grid,time_res):
148163
shortnames_list = []
149-
for query_item in query_list:
150-
# see if the query is an existing NASA Earthdata ShortName
151-
# if not, then do a text search of the ECCO variable lists
152-
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/collections.json",
153-
params={'ShortName':query_item})
154-
if len(response.json()['feed']['entry']) > 0:
155-
shortnames_list.append(query_item)
164+
if version == 'v4r5':
165+
if ((grid == 'native') or (grid is None)):
166+
if ((time_res == 'monthly') or (time_res == 'all')):
167+
import s3fs
168+
from os.path import split
169+
s3 = s3fs.S3FileSystem(anon=False,\
170+
requester_pays=True)
171+
s3_datasets_list = [split(dataset_path)[-1]\
172+
for dataset_path in \
173+
s3.ls("s3://ecco-model-granules/netcdf/V4r5/native/mon_mean/")]
174+
else:
175+
raise ValueError("'"+time_res+"' time res can not currently be accessed for v4r5.\n"\
176+
+"ecco_access can currently access only 'monthly' time_res v4r5 netCDF files.")
156177
else:
157-
shortname_match = ecco_podaac_varlist_query(query_item,version,grid,time_res)
158-
shortnames_list.append(shortname_match)
178+
raise ValueError("'"+grid+"' grid can not currently be accessed for v4r5.\n"\
179+
+"ecco_access can currently access only 'native' grid v4r5 netCDF files.")
180+
181+
for query_item in query_list:
182+
if version == 'v4r5':
183+
# see if the query is an existing dataset ID
184+
if query_item not in s3_datasets_list:
185+
raise ValueError("'"+query_item+"' is not a v4r5 dataset ID.\n"\
186+
+"Please query using the following dataset IDs:\n"\
187+
+str(s3_datasets_list))
188+
else:
189+
shortnames_list.append(query_item)
190+
else:
191+
# see if the query is an existing NASA Earthdata ShortName
192+
# if not, then do a text search of the ECCO variable lists
193+
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/collections.json",
194+
params={'ShortName':query_item})
195+
if len(response.json()['feed']['entry']) > 0:
196+
shortnames_list.append(query_item)
197+
else:
198+
shortname_match = ecco_podaac_varlist_query(query_item,version,grid,time_res)
199+
shortnames_list.append(shortname_match)
159200

160201
return shortnames_list
161202

@@ -173,10 +214,11 @@ def shortnames_find(query_list,grid,time_res):
173214
if isinstance(curr_query,str):
174215
curr_query = [curr_query]
175216
shortnames += shortnames_find(curr_query,\
217+
version,\
176218
grid=curr_grid,\
177219
time_res=curr_time_res)
178220
else:
179-
shortnames = shortnames_find(query,grid=grid,time_res=time_res)
221+
shortnames = shortnames_find(query,version,grid=grid,time_res=time_res)
180222

181223

182224
## query NASA Earthdata CMR and download granules
@@ -207,11 +249,11 @@ def shortnames_find(query_list,grid,time_res):
207249
kwargs['max_avail_frac'] = 0.5
208250
if mode == 'download_ifspace':
209251
granule_files = ecco_podaac_download_diskaware(\
210-
shortnames,StartDate,EndDate,snapshot_interval,\
252+
shortnames,StartDate,EndDate,version,snapshot_interval,\
211253
download_root_dir=download_root_dir,**kwargs)
212254
elif mode == 's3_get_ifspace':
213255
granule_files = ecco_podaac_s3_get_diskaware(\
214-
shortnames,StartDate,EndDate,snapshot_interval,\
256+
shortnames,StartDate,EndDate,version,snapshot_interval,\
215257
download_root_dir=download_root_dir,**kwargs)
216258
else:
217259
raise ValueError('Invalid mode specified; please specify one of the following:'\
@@ -232,15 +274,15 @@ def shortnames_find(query_list,grid,time_res):
232274
for shortname in shortnames:
233275

234276
if mode in ['ls','query']:
235-
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate,snapshot_interval)
277+
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate,version,snapshot_interval)
236278
granule_files[shortname] = urls
237279
elif mode in ['s3_ls','s3_query']:
238280
granule_files[shortname] = ecco_podaac_s3_query(\
239-
shortname,StartDate,EndDate,snapshot_interval)
281+
shortname,StartDate,EndDate,version,snapshot_interval)
240282
elif mode == 'download':
241283
kwargs['return_downloaded_files'] = True
242284
granule_files[shortname] = ecco_podaac_download(\
243-
shortname,StartDate,EndDate,snapshot_interval,\
285+
shortname,StartDate,EndDate,version,snapshot_interval,\
244286
download_root_dir=download_root_dir,\
245287
**kwargs)
246288
elif mode == 'download_subset':
@@ -253,15 +295,15 @@ def shortnames_find(query_list,grid,time_res):
253295
**kwargs)
254296
elif mode == 's3_open':
255297
granule_files[shortname] = ecco_podaac_s3_open(\
256-
shortname,StartDate,EndDate,snapshot_interval)
298+
shortname,StartDate,EndDate,version,snapshot_interval)
257299
elif mode == 's3_open_fsspec':
258300
# granule_files will consist of mapper objects rather than URL/path or file lists
259301
granule_files[shortname] = ecco_podaac_s3_open_fsspec(\
260302
shortname,**kwargs)
261303
elif mode == 's3_get':
262304
kwargs['return_downloaded_files'] = True
263305
granule_files[shortname] = ecco_podaac_s3_get(\
264-
shortname,StartDate,EndDate,snapshot_interval,\
306+
shortname,StartDate,EndDate,version,snapshot_interval,\
265307
download_root_dir=download_root_dir,\
266308
**kwargs)
267309
else:
@@ -314,7 +356,11 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
314356
will query the native grid monthly SSH datasets, and all native grid
315357
monthly datasets with variables or descriptions matching 'THETA'.
316358
317-
version: ('v4r4'), specifies ECCO version to query
359+
version: ('v4r4','v4r5'), specifies ECCO version to query.
360+
Currently 'v4r5' only works with ['s3_open','s3_get','s3_get_ifspace'] modes,
361+
or if the files are already stored in download_root_dir/ShortName/.
362+
Otherwise an error is returned.
363+
'v4r5' only has grid='native' and time_res='monthly' data files available.
318364
319365
grid: ('native','latlon',None), specifies whether to query datasets with output
320366
on the native grid or the interpolated lat/lon grid.
@@ -386,6 +432,16 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
386432
387433
force_redownload: bool, if True, existing files will be redownloaded and replaced;
388434
if False (default), existing files will not be replaced.
435+
436+
show_noredownload_msg: bool, if True (default), and force_redownload=False,
437+
display message for each file that is already
438+
downloaded (and therefore not re-downloaded);
439+
if False, these messages are not shown.
440+
441+
prompt_request_payer: bool, if True (default), user is prompted to approve
442+
(by entering "y" or "Y") any access to a
443+
requester pays bucket, otherwise request is canceled;
444+
if False, data access proceeds without prompting.
389445
390446
Returns
391447
-------

0 commit comments

Comments
 (0)