Skip to content

Commit 50000b1

Browse files
authored
Merge pull request #104 from andrewdelman/ecco_access_202504
Adding v4r5 monthly mean variable list
2 parents d7726de + 7309ae0 commit 50000b1

12 files changed

+1042
-37
lines changed

ecco_access/ecco_access.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ def ecco_podaac_access(query,version='v4r4',grid=None,time_res='all',\
9898
9999
download_root_dir: str, defines parent directory to download files to.
100100
Files will be downloaded to directory download_root_dir/ShortName/.
101-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
101+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
102+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
102103
103104
Additional keyword arguments*:
104105
*This is not an exhaustive list, especially for
@@ -181,12 +182,12 @@ def shortnames_find(query_list,version,grid,time_res):
181182
for query_item in query_list:
182183
if version == 'v4r5':
183184
# see if the query is an existing dataset ID
184-
if query_item not in s3_datasets_list:
185-
raise ValueError("'"+query_item+"' is not a v4r5 dataset ID.\n"\
186-
+"Please query using the following dataset IDs:\n"\
187-
+str(s3_datasets_list))
188-
else:
185+
# if not, then do a text search of the ECCO variable lists
186+
if query_item in s3_datasets_list:
189187
shortnames_list.append(query_item)
188+
else:
189+
shortname_match = ecco_podaac_varlist_query(query_item,version,grid,time_res)
190+
shortnames_list.append(shortname_match)
190191
else:
191192
# see if the query is an existing NASA Earthdata ShortName
192193
# if not, then do a text search of the ECCO variable lists
@@ -238,10 +239,15 @@ def shortnames_find(query_list,version,grid,time_res):
238239
for kwarg in list(kwargs.keys()):
239240
if kwarg != 'jsons_root_dir':
240241
del kwargs[kwarg]
242+
elif mode == 's3_open':
243+
for kwarg in list(kwargs.keys()):
244+
if kwarg in ['n_workers','force_redownload','show_noredownload_msg']:
245+
del kwargs[kwarg]
241246
else:
242247
if 'jsons_root_dir' in kwargs.keys():
243248
del kwargs['jsons_root_dir']
244249

250+
245251
# download or otherwise access granules, depending on mode
246252

247253
if mode in ['download_ifspace','s3_get_ifspace']:
@@ -295,7 +301,8 @@ def shortnames_find(query_list,version,grid,time_res):
295301
**kwargs)
296302
elif mode == 's3_open':
297303
granule_files[shortname] = ecco_podaac_s3_open(\
298-
shortname,StartDate,EndDate,version,snapshot_interval)
304+
shortname,StartDate,EndDate,version,snapshot_interval,\
305+
**kwargs)
299306
elif mode == 's3_open_fsspec':
300307
# granule_files will consist of mapper objects rather than URL/path or file lists
301308
granule_files[shortname] = ecco_podaac_s3_open_fsspec(\
@@ -406,7 +413,8 @@ def ecco_podaac_to_xrdataset(query,version='v4r4',grid=None,time_res='all',\
406413
407414
download_root_dir: str, defines parent directory to download files to.
408415
Files will be downloaded to directory download_root_dir/ShortName/.
409-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
416+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
417+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
410418
411419
Additional keyword arguments*:
412420
*This is not an exhaustive list, especially for

ecco_access/ecco_download.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ def ecco_podaac_download(ShortName,StartDate,EndDate,version,snapshot_interval='
388388
389389
download_root_dir: str, defines parent directory to download files to.
390390
Files will be downloaded to directory download_root_dir/ShortName/.
391-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
391+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
392+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
392393
393394
n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6.
394395
@@ -417,16 +418,21 @@ def ecco_podaac_download(ShortName,StartDate,EndDate,version,snapshot_interval='
417418

418419
# set default download parent directory
419420
if download_root_dir==None:
420-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
421+
if version == 'v4r4':
422+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
423+
elif version == 'v4r5':
424+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
421425

422426
# define the directory where the downloaded files will be saved
423427
download_dir = Path(download_root_dir) / ShortName
424428

425-
# create the download directory
429+
# create the download directory if it does not already exist
430+
if isdir(download_dir) == True:
431+
print(f'Download to directory {download_dir}')
432+
else:
433+
print(f'Creating download directory {download_dir}')
426434
download_dir.mkdir(exist_ok = True, parents=True)
427435

428-
print(f'created download directory {download_dir}')
429-
430436
# query CMR for granules matching the request
431437
urls,sizes = ecco_podaac_query(ShortName,StartDate,EndDate,version,snapshot_interval)
432438

@@ -478,7 +484,8 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,version,snapshot
478484
479485
download_root_dir: str, defines parent directory to download files to.
480486
Files will be downloaded to directory download_root_dir/ShortName/.
481-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
487+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
488+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
482489
483490
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
484491
If storing the datasets exceeds this fraction, an error is returned.
@@ -522,7 +529,10 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,version,snapshot
522529

523530
# set default download parent directory
524531
if download_root_dir==None:
525-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
532+
if version == 'v4r4':
533+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
534+
elif version == 'v4r5':
535+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
526536

527537
# add up total size of files that would be downloaded
528538
dataset_sizes = np.array([])
@@ -532,8 +542,14 @@ def ecco_podaac_download_diskaware(ShortNames,StartDate,EndDate,version,snapshot
532542
# get list of files
533543
urls,sizes = ecco_podaac_query(curr_shortname,StartDate,EndDate,version,snapshot_interval)
534544

535-
# create the download directory if it does not already exist
545+
# define the directory where the downloaded files will be saved
536546
download_dir = Path(download_root_dir) / curr_shortname
547+
548+
# create the download directory if it does not already exist
549+
if isdir(download_dir) == True:
550+
print(f'Download to directory {download_dir}')
551+
else:
552+
print(f'Creating download directory {download_dir}')
537553
download_dir.mkdir(exist_ok = True, parents=True)
538554

539555
# compute size of current dataset
@@ -718,7 +734,8 @@ def ecco_podaac_download_subset(ShortName,StartDate=None,EndDate=None,snapshot_i
718734
719735
download_root_dir: str, defines parent directory to download files to.
720736
Files will be downloaded to directory download_root_dir/ShortName/.
721-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
737+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
738+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
722739
subset_file_id: str, identifier appended to each downloaded file to identify it as a subset.
723740
Default is to not append an identifier.
724741
@@ -1079,7 +1096,10 @@ def download_wrapper(url: str, url_append: str, download_dir: str, subset_file_i
10791096

10801097
# set default download parent directory
10811098
if download_root_dir==None:
1082-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
1099+
if version == 'v4r4':
1100+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
1101+
elif version == 'v4r5':
1102+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
10831103

10841104
# define the directory where the downloaded files will be saved
10851105
download_dir = Path(download_root_dir) / ShortName

ecco_access/ecco_s3_retrieve.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,9 @@ def get_granules_ecco_bucket(StartDate: str, EndDate: str,\
149149
s3 = s3fs.S3FileSystem(anon=False,\
150150
requester_pays=True)
151151
if version == 'v4r5':
152+
shortname_dir = "_".join(ShortName.split("_")[2:-3])
152153
s3_files_all = s3.ls("s3://ecco-model-granules/netcdf/V4r5/native/mon_mean/"\
153-
+ShortName+"/")
154+
+shortname_dir+"/")
154155

155156
# include only the granules in the date range given by temporal_range
156157
s3_files_all_dates = np.array([np.datetime64(s3_file.split("_")[-5],'M')\
@@ -552,7 +553,8 @@ def ecco_podaac_s3_get(ShortName,StartDate,EndDate,version,snapshot_interval='mo
552553
553554
download_root_dir: str, defines parent directory to download files to.
554555
Files will be downloaded to directory download_root_dir/ShortName/.
555-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
556+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
557+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
556558
557559
n_workers: int, number of workers to use in concurrent downloads. Benefits typically taper off above 5-6.
558560
@@ -588,12 +590,19 @@ def ecco_podaac_s3_get(ShortName,StartDate,EndDate,version,snapshot_interval='mo
588590

589591
# set default download parent directory
590592
if download_root_dir==None:
591-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
593+
if version == 'v4r4':
594+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
595+
elif version == 'v4r5':
596+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
592597

593598
# define the directory where the downloaded files will be saved
594599
download_dir = Path(download_root_dir) / ShortName
595600

596-
# create the download directory
601+
# create the download directory if it does not already exist
602+
if isdir(download_dir) == True:
603+
print(f'Download to directory {download_dir}')
604+
else:
605+
print(f'Creating download directory {download_dir}')
597606
download_dir.mkdir(exist_ok = True, parents=True)
598607

599608
# get list of files
@@ -664,7 +673,8 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,version,snapshot_i
664673
665674
download_root_dir: str, defines parent directory to download files to.
666675
Files will be downloaded to directory download_root_dir/ShortName/.
667-
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/'.
676+
If not specified, parent directory defaults to '~/Downloads/ECCO_V4r4_PODAAC/',
677+
or '~/Downloads/ECCO_V4r5_PODAAC/' if version == 'v4r5'.
668678
669679
max_avail_frac: float, maximum fraction of remaining available disk space to use in storing current ECCO datasets.
670680
This determines whether the dataset files are stored on the current instance, or opened on S3.
@@ -717,7 +727,10 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,version,snapshot_i
717727

718728
# set default download parent directory
719729
if download_root_dir==None:
720-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
730+
if version == 'v4r4':
731+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
732+
elif version == 'v4r5':
733+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
721734

722735
# add up total size of files that would be downloaded
723736
dataset_sizes = np.array([])
@@ -729,6 +742,10 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,version,snapshot_i
729742

730743
# create the download directory if it does not already exist
731744
download_dir = Path(download_root_dir) / curr_shortname
745+
if isdir(download_dir) == True:
746+
print(f'Download to directory {download_dir}')
747+
else:
748+
print(f'Creating download directory {download_dir}')
732749
download_dir.mkdir(exist_ok = True, parents=True)
733750

734751
# compute size of current dataset
@@ -777,7 +794,10 @@ def ecco_podaac_s3_get_diskaware(ShortNames,StartDate,EndDate,version,snapshot_i
777794
for curr_shortname,s3_files_list in zip(ShortNames,s3_files_list_all):
778795
# set default download parent directory
779796
if download_root_dir==None:
780-
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
797+
if version == 'v4r4':
798+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r4_PODAAC')
799+
elif version == 'v4r5':
800+
download_root_dir = join(expanduser('~'),'Downloads','ECCO_V4r5_PODAAC')
781801

782802
# define the directory where the downloaded files will be saved
783803
download_dir = Path(download_root_dir) / curr_shortname

ecco_access/ecco_varlist.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def print_varlist_query_results(query,shortnames_match,\
108108
for count,shortname in enumerate(shortnames_match):
109109
shortname_line = 'Option '+str(count+1)+': '+shortname
110110
if (('Note' in content_dict[shortname].keys())\
111-
and ('*' in content_dict[shortname]['Note'])):
111+
and (' *' in content_dict[shortname]['Note'])):
112112
shortname_line += (' '+content_dict[shortname]['Note'])
113113
else:
114114
shortname_line += ' *'
@@ -126,6 +126,8 @@ def print_varlist_query_results(query,shortnames_match,\
126126
query_resp_str += shortname_line
127127
for varname,descrip in content_dict[shortname].items():
128128
max_descrip_len_perline = 50
129+
if varname == 'Note':
130+
max_descrip_len_perline += descrip_pos - 6
129131
if len(descrip) > max_descrip_len_perline:
130132
# put line breaks in long descriptions
131133
descrip_withbreaks = ''
@@ -139,10 +141,13 @@ def print_varlist_query_results(query,shortnames_match,\
139141
descrip_withbreaks += curr_descrip
140142
descrip_remaining = descrip_remaining[(len(curr_descrip)+1):]
141143
if len(descrip_remaining) > 0:
142-
descrip_withbreaks += ('\n'+(' '*descrip_pos))
144+
if varname == 'Note':
145+
descrip_withbreaks += '\n '
146+
else:
147+
descrip_withbreaks += ('\n'+(' '*descrip_pos))
143148
descrip = descrip_withbreaks
144149
if varname == 'Note':
145-
if '*' in descrip:
150+
if ' *' in descrip:
146151
continue
147152
else:
148153
var_line = 'Note: '+descrip+'\n'
@@ -176,7 +181,7 @@ def ecco_podaac_varlist_query(query,version,grid=None,time_res='all'):
176181
query: str, a text string being used to query ShortNames, variable names,
177182
and descriptions.
178183
179-
version: ('v4r4'), ECCO version to search variable lists for
184+
version: ('v4r4','v4r5'), ECCO version to search variable lists for
180185
181186
grid: ('native','latlon',None), specifies whether to query datasets with output
182187
on the native grid or the interpolated lat/lon grid.
@@ -194,20 +199,25 @@ def ecco_podaac_varlist_query(query,version,grid=None,time_res='all'):
194199
195200
"""
196201

202+
197203
pass
198204

199205

200-
if version != 'v4r4':
206+
if version not in ['v4r4','v4r5']:
201207
raise ValueError('ECCO '+version+' is not currently available from PO.DAAC')
202208

203209
# paths to variable list files
204-
varlist_url_root = 'https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/varlist/'
205-
varlist_url_ids = {'native,monthly':'v4r4_nctiles_monthly_varlist.txt',\
206-
'native,daily':'v4r4_nctiles_daily_varlist.txt',\
207-
'native,snapshot':'v4r4_nctiles_snapshots_varlist.txt',\
208-
'latlon,monthly':'v4r4_latlon_monthly_varlist.txt',\
209-
'latlon,daily':'v4r4_latlon_daily_varlist.txt',\
210-
'mixed,all':'v4r4_tseries_grid_varlist.txt'}
210+
varlist_url_root = 'https://raw.githubusercontent.com/ECCO-GROUP/ECCO-v4-Python-Tutorial/master/ecco_access/varlist/'
211+
if version == 'v4r4':
212+
varlist_url_ids = {'native,monthly':'v4r4_nctiles_monthly_varlist.txt',\
213+
'native,daily':'v4r4_nctiles_daily_varlist.txt',\
214+
'native,snapshot':'v4r4_nctiles_snapshots_varlist.txt',\
215+
'latlon,monthly':'v4r4_latlon_monthly_varlist.txt',\
216+
'latlon,daily':'v4r4_latlon_daily_varlist.txt',\
217+
'mixed,all':'v4r4_tseries_grid_varlist.txt'}
218+
elif version == 'v4r5':
219+
varlist_url_ids = {'native,monthly':'v4r5_nctiles_monthly_varlist.txt'}
220+
211221

212222
# set keys of grid types and time resolutions to search
213223

@@ -245,6 +255,8 @@ def ecco_podaac_varlist_query(query,version,grid=None,time_res='all'):
245255
# build content dictionary containing all varlists being queried
246256
content_dict = {}
247257
for curr_key in grid_timeres_keys:
258+
if curr_key not in varlist_url_ids.keys():
259+
continue
248260
curr_content_dict,varname_pos,descrip_pos = varlist_file_parse(\
249261
varlist_url_root+varlist_url_ids[curr_key])
250262
content_dict = {**content_dict,**curr_content_dict}

0 commit comments

Comments
 (0)