ECCO-GROUP
diff --git a/‎Tutorials_as_Jupyter_Notebooks/ECCO_v4_Accessing_and_Subsetting_Variables.ipynb‎
Lines changed: 32 additions & 26 deletions b/‎Tutorials_as_Jupyter_Notebooks/ECCO_v4_Accessing_and_Subsetting_Variables.ipynb‎
Lines changed: 32 additions & 26 deletions
diff --git a/‎Tutorials_as_Jupyter_Notebooks/ECCO_v4_Combining_Multiple_Datasets.ipynb‎
Lines changed: 38 additions & 35 deletions b/‎Tutorials_as_Jupyter_Notebooks/ECCO_v4_Combining_Multiple_Datasets.ipynb‎
Lines changed: 38 additions & 35 deletions
@@ -20,12 +20,12 @@
     "\n",
     "Both methods work identically to access *Dimensions*, *Coordinates*, and *Data variables*. Accessing *Attribute* variables requires a slightly different approach as we will see.\n",
     "\n",
-    "For this tutorial make sure that you have the 2010 monthly granules of the SSH and OBP datasets downloaded, with ShortNames:\n",
+    "For this tutorial you will need the 2010 monthly granules of the SSH and OBP datasets downloaded, with ShortNames\n",
     "\n",
     "- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4**\n",
     "- **ECCO_L4_OBP_LLC0090GRID_MONTHLY_V4R4**\n",
     "\n",
-    "You will also need the [grid file](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_native_model_grid_parameters.html) downloaded if you don't have it already.\n",
+    "as well as the [grid file](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_native_model_grid_parameters.html). The `ecco_access` library used in the notebook will handle download or retrieval of the necessary data, if you have set up the library [in your Python path](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access).\n",
     "\n",
     "\n",
     "## Accessing fields inside `Dataset` and `DataArray` objects\n",
@@ -47,12 +47,23 @@
     "import json\n",
     "import glob\n",
     "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
     "\n",
-    "# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
-    "incloud_access = False\n",
+    "import ecco_access as ea\n",
     "\n",
-    "import warnings\n",
-    "warnings.filterwarnings('ignore')"
+    "# indicate mode of access\n",
+    "# options are:\n",
+    "# 'download': direct download from internet to your local machine\n",
+    "# 'download_ifspace': like download, but only proceeds \n",
+    "#                     if your machine have sufficient storage\n",
+    "# 's3_open': access datasets in-cloud from an AWS instance\n",
+    "# 's3_open_fsspec': use jsons generated with fsspec and \n",
+    "#                   kerchunk libraries to speed up in-cloud access\n",
+    "# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
+    "# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
+    "#                   has sufficient storage\n",
+    "access_mode = 'download_ifspace'"
    ]
   },
   {
@@ -85,9 +96,12 @@
     "## Set top-level file directory for the ECCO NetCDF files\n",
     "## =================================================================\n",
     "\n",
-    "## currently set to ~/Downloads/ECCO_V4r4_PODAAC, \n",
-    "## the default if ecco_podaac_download was used to download dataset granules\n",
-    "ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
+    "## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
+    "ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
+    "\n",
+    "# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
+    "# # containing the jsons\n",
+    "# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
    ]
   },
   {
@@ -101,24 +115,16 @@
     "ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
     "                   \"ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4\",\\\n",
     "                   \"ECCO_L4_OBP_LLC0090GRID_MONTHLY_V4R4\"]\n",
-    "if incloud_access == True:\n",
-    "    from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
-    "    files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
+    "\n",
+    "ds_dict = ea.ecco_podaac_to_xrdataset(ShortNames_list,\\\n",
     "                                              StartDate='2010-01',EndDate='2010-12',\\\n",
-    "                                              max_avail_frac=0.5,\\\n",
-    "                                              download_root_dir=ECCO_dir)\n",
-    "    ecco_grid = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
-    "    ds_SSH = xr.open_mfdataset(files_dict[ShortNames_list[1]],\\\n",
-    "                                     compat='override',data_vars='minimal',coords='minimal')\n",
-    "    ds_OBP = xr.open_mfdataset(files_dict[ShortNames_list[2]],\\\n",
-    "                                      compat='override',data_vars='minimal',coords='minimal')\n",
-    "else:\n",
-    "    ecco_grid = xr.open_dataset(glob.glob(join(ECCO_dir,ShortNames_list[0],'*.nc'))[0])\n",
-    "    ds_SSH = xr.open_mfdataset(glob.glob(join(ECCO_dir,ShortNames_list[1],'*2010-*.nc')),\\\n",
-    "                                     compat='override',data_vars='minimal',coords='minimal')\n",
-    "    ds_OBP = xr.open_mfdataset(glob.glob(join(ECCO_dir,ShortNames_list[2],'*2010-*.nc')),\\\n",
-    "                                     compat='override',data_vars='minimal',coords='minimal')\n",
+    "                                              mode=access_mode,\\\n",
+    "                                              download_root_dir=ECCO_dir,\\\n",
+    "                                              max_avail_frac=0.5)\n",
     "\n",
+    "ecco_grid = ds_dict[ShortNames_list[0]]\n",
+    "ds_SSH = ds_dict[ShortNames_list[1]]\n",
+    "ds_OBP = ds_dict[ShortNames_list[2]]\n",
     "\n",
     "## Merge the grid and variable datasets to make the ecco_ds\n",
     "ecco_ds = xr.merge((ecco_grid,ds_SSH,ds_OBP))                                      "
@@ -3934,7 +3940,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
 
@@ -14,14 +14,15 @@
     "\n",
     "In previous tutorials we've loaded single lat-lon-cap NetCDF tile files (granules) for ECCO state estimate variables and model grid parameters.  Here we will demonstrate merging `Datasets` together.  Some benefits of merging `Datasets` include having a tidier workspace and simplifying subsetting operations (e.g., using ``xarray.isel`` or ``xarray.sel`` as shown in the [previous tutorial](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Loading_the_ECCOv4_state_estimate_fields_on_the_native_model_grid.html)).  \n",
     "\n",
-    "First, we'll load three ECCOv4 NetCDF state estimate variables (each centered on different coordinates) as well as the model grid file. For this, you will need to download 2 datasets of monthly mean fields for the year 2010. The ShortNames for the 2 datasets are:\n",
+    "First, we'll load three ECCOv4 NetCDF state estimate variables (each centered on different coordinates) as well as the model grid file. For this, you will need 2 datasets of monthly mean fields for the year 2010, as well as the grid parameters file. The ShortNames for the datasets are:\n",
     "\n",
-    "- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4**\n",
-    "- **ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4**\n",
+    "- **ECCO_L4_GEOMETRY_LLC0090GRID_V4R4** (no time dimension)\n",
+    "- **ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4** (Jan-Dec 2010)\n",
+    "- **ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4** (Jan-Dec 2010)\n",
     "\n",
-    "If you did the previous tutorial you already have the SSH files.\n",
+    "The `ecco_access` library used in the notebook will handle download or retrieval of the necessary data, if you have set up the library [in your Python path](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_access_intro.html#Setting-up-ecco_access).\n",
     "\n",
-    "Once you have the required ECCOv4 output downloaded, let's define our environment."
+    "Let's define our environment:"
    ]
   },
   {
@@ -36,9 +37,20 @@
     "import matplotlib.pyplot as plt\n",
     "import json\n",
     "\n",
+    "import ecco_access as ea\n",
     "\n",
-    "# indicate whether you are working in a cloud instance (True if yes, False otherwise)\n",
-    "incloud_access = False"
+    "# indicate mode of access\n",
+    "# options are:\n",
+    "# 'download': direct download from internet to your local machine\n",
+    "# 'download_ifspace': like download, but only proceeds \n",
+    "#                     if your machine have sufficient storage\n",
+    "# 's3_open': access datasets in-cloud from an AWS instance\n",
+    "# 's3_open_fsspec': use jsons generated with fsspec and \n",
+    "#                   kerchunk libraries to speed up in-cloud access\n",
+    "# 's3_get': direct download from S3 in-cloud to an AWS instance\n",
+    "# 's3_get_ifspace': like s3_get, but only proceeds if your instance \n",
+    "#                   has sufficient storage\n",
+    "access_mode = 'download_ifspace'"
    ]
   },
   {
@@ -72,9 +84,12 @@
     "## Set top-level file directory for the ECCO NetCDF files\n",
     "## =================================================================\n",
     "\n",
-    "## currently set to ~/Downloads/ECCO_V4r4_PODAAC, \n",
-    "## the default if ecco_podaac_download was used to download dataset granules\n",
-    "ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')"
+    "## currently set to /Downloads/ECCO_V4r4_PODAAC\n",
+    "ECCO_dir = join(user_home_dir,'Downloads','ECCO_V4r4_PODAAC')\n",
+    "\n",
+    "# # for access_mode = 's3_open_fsspec', need to specify the root directory \n",
+    "# # containing the jsons\n",
+    "# jsons_root_dir = join('/efs_ecco','mzz-jsons')"
    ]
   },
   {
@@ -83,17 +98,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "## if working in the AWS cloud, access datasets needed for this tutorial\n",
+    "## Access datasets needed for this tutorial\n",
     "\n",
     "ShortNames_list = [\"ECCO_L4_GEOMETRY_LLC0090GRID_V4R4\",\\\n",
     "                   \"ECCO_L4_SSH_LLC0090GRID_MONTHLY_V4R4\",\\\n",
     "                   \"ECCO_L4_OCEAN_3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY_V4R4\"]\n",
-    "if incloud_access == True:\n",
-    "    from ecco_s3_retrieve import ecco_podaac_s3_get_diskaware\n",
-    "    files_dict = ecco_podaac_s3_get_diskaware(ShortNames=ShortNames_list,\\\n",
+    "\n",
+    "ds_dict = ea.ecco_podaac_to_xrdataset(ShortNames_list,\\\n",
     "                                              StartDate='2010-01',EndDate='2010-12',\\\n",
-    "                                              max_avail_frac=0.5,\\\n",
-    "                                              download_root_dir=ECCO_dir)"
+    "                                              mode=access_mode,\\\n",
+    "                                              download_root_dir=ECCO_dir,\\\n",
+    "                                              max_avail_frac=0.5)"
    ]
   },
   {
@@ -110,12 +125,7 @@
    "outputs": [],
    "source": [
     "# load dataset containing monthly SSH in 2010\n",
-    "if incloud_access == True:\n",
-    "    # use list comprehension to list file path(s)\n",
-    "    file_paths = [filepath for filepath in files_dict[ShortNames_list[1]] if '_2010-' in filepath]\n",
-    "    ecco_dataset_A = xr.open_mfdataset(file_paths)\n",
-    "else:\n",
-    "    ecco_dataset_A = xr.open_mfdataset(join(ECCO_dir,'*SSH*MONTHLY*','*_2010-??_*.nc'))"
+    "ecco_dataset_A = ds_dict[ShortNames_list[1]]"
    ]
   },
   {
@@ -224,12 +234,8 @@
     }
    ],
    "source": [
-    "# load dataset containing monthly mean 3D temperature fluxes in 2010\n",
-    "if incloud_access == True:\n",
-    "    file_paths = [filepath for filepath in files_dict[ShortNames_list[2]] if '_2010-' in filepath]\n",
-    "    ecco_dataset_B = xr.open_mfdataset(file_paths)\n",
-    "else:\n",
-    "    ecco_dataset_B = xr.open_mfdataset(join(ECCO_dir,'*3D_TEMPERATURE_FLUX_LLC0090GRID_MONTHLY*','*_2010-??_*.nc'))\n",
+    "# open dataset containing monthly mean 3D temperature fluxes in 2010\n",
+    "ecco_dataset_B = ds_dict[ShortNames_list[2]]\n",
     "\n",
     "ecco_dataset_B.data_vars"
    ]
@@ -1568,7 +1574,7 @@
    },
    "outputs": [],
    "source": [
-    "# merge together\n",
+    "# merge together and load into memory\n",
     "ecco_dataset_AB = xr.merge([ecco_dataset_A['SSH'], ecco_dataset_B[['ADVx_TH','ADVy_TH']]]).compute()"
    ]
   },
@@ -2499,11 +2505,8 @@
    ],
    "source": [
     "# Load the llc90 grid parameters\n",
-    "if incloud_access == True:\n",
-    "    grid_dataset = xr.open_dataset(files_dict[ShortNames_list[0]])\n",
-    "else:\n",
-    "    import glob\n",
-    "    grid_dataset = xr.open_dataset(glob.glob(join(ECCO_dir,'*GEOMETRY*','*.nc'))[0])\n",
+    "grid_dataset = ds_dict[ShortNames_list[0]].compute()\n",
+    "\n",
     "grid_dataset.coords"
    ]
   },
@@ -3382,7 +3385,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,