Skip to content

Commit 47b0aad

Browse files
committed
ecco_access add capability to access v4r5 files from ecco-model-granules bucket
1 parent ae7b717 commit 47b0aad

File tree

3 files changed

+251
-119
lines changed

3 files changed

+251
-119
lines changed

ecco_access/ecco_access.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
4444
will query the native grid monthly SSH datasets, and all native grid
4545
monthly datasets with variables or descriptions matching 'THETA'.
4646
47-
version: ('v4r4'), specifies ECCO version to query
47+
version: ('v4r4','v4r5'), specifies ECCO version to query.
48+
Currently 'v4r5' only works with ['s3_open','s3_get','s3_get_ifspace'] modes,
49+
or if the files are already stored in download_root_dir/ShortName/.
50+
Otherwise an error is returned.
51+
'v4r5' only has grid='native' and time_res='monthly' data files available.
4852
4953
grid: ('native','latlon',None), specifies whether to query datasets with output
5054
on the native grid or the interpolated lat/lon grid.
@@ -139,23 +143,50 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
139143
140144
"""
141145

146+
142147
pass
143148

144149

145150
## query varlists as needed to obtain shortnames
146151

147-
def shortnames_find(query_list,grid,time_res):
152+
def shortnames_find(query_list,version,grid,time_res):
148153
shortnames_list = []
149-
for query_item in query_list:
150-
# see if the query is an existing NASA Earthdata ShortName
151-
# if not, then do a text search of the ECCO variable lists
152-
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/collections.json",
153-
params={'ShortName':query_item})
154-
if len(response.json()['feed']['entry']) > 0:
155-
shortnames_list.append(query_item)
154+
if version == 'v4r5':
155+
if ((grid == 'native') or (grid is None)):
156+
if ((time_res == 'monthly') or (time_res == 'all')):
157+
import s3fs
158+
from os.path import split
159+
s3 = s3fs.S3FileSystem(anon=False,\
160+
requester_pays=True)
161+
s3_datasets_list = [split(dataset_path)[-1]\
162+
for dataset_path in \
163+
s3.ls("s3://ecco-model-granules/netcdf/V4r5/native/mon_mean/")]
164+
else:
165+
raise ValueError("'"+time_res+"' time res can not currently be accessed for v4r5.\n"\
166+
+"ecco_access can currently access only 'monthly' time_res v4r5 netCDF files.")
156167
else:
157-
shortname_match = ecco_podaac_varlist_query(query_item,version,grid,time_res)
158-
shortnames_list.append(shortname_match)
168+
raise ValueError("'"+grid+"' grid can not currently be accessed for v4r5.\n"\
169+
+"ecco_access can currently access only 'native' grid v4r5 netCDF files.")
170+
171+
for query_item in query_list:
172+
if version == 'v4r5':
173+
# see if the query is an existing dataset ID
174+
if query_item not in s3_datasets_list:
175+
raise ValueError("'"+query_item+"' is not a v4r5 dataset ID.\n"\
176+
+"Please query using the following dataset IDs:\n"\
177+
+str(s3_datasets_list))
178+
else:
179+
shortnames_list.append(query_item)
180+
else:
181+
# see if the query is an existing NASA Earthdata ShortName
182+
# if not, then do a text search of the ECCO variable lists
183+
response = requests.get(url="https://cmr.earthdata.nasa.gov/search/collections.json",
184+
params={'ShortName':query_item})
185+
if len(response.json()['feed']['entry']) > 0:
186+
shortnames_list.append(query_item)
187+
else:
188+
shortname_match = ecco_podaac_varlist_query(query_item,version,grid,time_res)
189+
shortnames_list.append(shortname_match)
159190

160191
return shortnames_list
161192

@@ -173,10 +204,11 @@ def shortnames_find(query_list,grid,time_res):
173204
if isinstance(curr_query,str):
174205
curr_query = [curr_query]
175206
shortnames += shortnames_find(curr_query,\
207+
version,\
176208
grid=curr_grid,\
177209
time_res=curr_time_res)
178210
else:
179-
shortnames = shortnames_find(query,grid=grid,time_res=time_res)
211+
shortnames = shortnames_find(query,version,grid=grid,time_res=time_res)
180212

181213

182214
## query NASA Earthdata CMR and download granules
@@ -207,11 +239,11 @@ def shortnames_find(query_list,grid,time_res):
207239
kwargs['max_avail_frac'] = 0.5
208240
if mode == 'download_ifspace':
209241
granule_files = ecco_podaac_download_diskaware(\
210-
shortnames,StartDate,EndDate,snapshot_interval,\
242+
shortnames,StartDate,EndDate,version,snapshot_interval,\
211243
download_root_dir=download_root_dir,**kwargs)
212244
elif mode == 's3_get_ifspace':
213245
granule_files = ecco_podaac_s3_get_diskaware(\
214-
shortnames,StartDate,EndDate,snapshot_interval,\
246+
shortnames,StartDate,EndDate,version,snapshot_interval,\
215247
download_root_dir=download_root_dir,**kwargs)
216248
else:
217249
raise ValueError('Invalid mode specified; please specify one of the following:'\
@@ -232,15 +264,15 @@ def shortnames_find(query_list,grid,time_res):
232264
for shortname in shortnames:
233265

234266
if mode in ['ls','query']:
235-
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate,snapshot_interval)
267+
urls,sizes = ecco_podaac_query(shortname,StartDate,EndDate,version,snapshot_interval)
236268
granule_files[shortname] = urls
237269
elif mode in ['s3_ls','s3_query']:
238270
granule_files[shortname] = ecco_podaac_s3_query(\
239-
shortname,StartDate,EndDate,snapshot_interval)
271+
shortname,StartDate,EndDate,version,snapshot_interval)
240272
elif mode == 'download':
241273
kwargs['return_downloaded_files'] = True
242274
granule_files[shortname] = ecco_podaac_download(\
243-
shortname,StartDate,EndDate,snapshot_interval,\
275+
shortname,StartDate,EndDate,version,snapshot_interval,\
244276
download_root_dir=download_root_dir,\
245277
**kwargs)
246278
elif mode == 'download_subset':
@@ -253,15 +285,15 @@ def shortnames_find(query_list,grid,time_res):
253285
**kwargs)
254286
elif mode == 's3_open':
255287
granule_files[shortname] = ecco_podaac_s3_open(\
256-
shortname,StartDate,EndDate,snapshot_interval)
288+
shortname,StartDate,EndDate,version,snapshot_interval)
257289
elif mode == 's3_open_fsspec':
258290
# granule_files will consist of mapper objects rather than URL/path or file lists
259291
granule_files[shortname] = ecco_podaac_s3_open_fsspec(\
260292
shortname,**kwargs)
261293
elif mode == 's3_get':
262294
kwargs['return_downloaded_files'] = True
263295
granule_files[shortname] = ecco_podaac_s3_get(\
264-
shortname,StartDate,EndDate,snapshot_interval,\
296+
shortname,StartDate,EndDate,version,snapshot_interval,\
265297
download_root_dir=download_root_dir,\
266298
**kwargs)
267299
else:
@@ -314,7 +346,11 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
314346
will query the native grid monthly SSH datasets, and all native grid
315347
monthly datasets with variables or descriptions matching 'THETA'.
316348
317-
version: ('v4r4'), specifies ECCO version to query
349+
version: ('v4r4','v4r5'), specifies ECCO version to query.
350+
Currently 'v4r5' only works with ['s3_open','s3_get','s3_get_ifspace'] modes,
351+
or if the files are already stored in download_root_dir/ShortName/.
352+
Otherwise an error is returned.
353+
'v4r5' only has grid='native' and time_res='monthly' data files available.
318354
319355
grid: ('native','latlon',None), specifies whether to query datasets with output
320356
on the native grid or the interpolated lat/lon grid.

0 commit comments

Comments
 (0)