Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 71 additions & 20 deletions argopy/stores/index/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,11 @@ def lon(self):
----------
BOX : list, tuple, int, float, optional
An index box to search Argo records for. Can be:

- Full 6-element list: [lon_min, lon_max, lat_min, lat_max, date_min, date_max]
- 2-element list: [lon_min, lon_max]
- Single value: interpreted as lower bound (ge)

ge : int or float, optional
Greater or equal bound for longitude filtering (lower limit).
Default: -180
Expand Down Expand Up @@ -249,11 +249,11 @@ def lat(self):
----------
BOX : list, tuple, int, float, optional
An index box to search Argo records for. Can be:

- Full 6-element list: [lon_min, lon_max, lat_min, lat_max, date_min, date_max]
- 2-element list: [lat_min, lat_max]
- Single value: interpreted as lower bound (ge)

ge : int or float, optional
Greater or equal bound for latitude filtering (lower limit).
Default: -90
Expand Down Expand Up @@ -296,11 +296,11 @@ def date(self):
----------
BOX : list or str, optional
An index box to search Argo records for. Can be:

- Full 6-element list: [lon_min, lon_max, lat_min, lat_max, date_min, date_max]
- 2-element list: [date_min, date_max]
- Single date string: interpreted as day-only (profiles on that specific date)

ge : str, optional
Greater or equal bound for date filtering (lower limit).
Default: '1900-01-01'
Expand Down Expand Up @@ -456,7 +456,7 @@ def parameter_data_mode(self):
def profiler_type(self):
"""Search index for profiler types

The list of valid types is given by IDs of `Argo reference table 8 <http://vocab.nerc.ac.uk/collection/R08/current/>`_.
The list of valid types is given in `Argo reference table 8 / ARGO_WMO_INST_TYPE <http://vocab.nerc.ac.uk/collection/R08/current/>`_.

Parameters
----------
Expand All @@ -479,8 +479,8 @@ def profiler_type(self):
.. code-block:: python
:caption: List valid types

from argopy import ArgoNVSReferenceTables
valid_types = ArgoNVSReferenceTables().tbl(8)['altLabel']
from argopy import ArgoReferenceTable
valid_types : list[str] = ArgoReferenceTable('ARGO_WMO_INST_TYPE').keys()

See Also
--------
Expand All @@ -491,12 +491,12 @@ def profiler_type(self):
def profiler_label(self, profiler_label: str, nrows=None, composed=False):
"""Search index for profiler types with a given string in their long name

Will search for string occurrences in the preferred label of `Argo reference table 8 <http://vocab.nerc.ac.uk/collection/R08/current/>`_.
Will search for string occurrences in the preferred label of `Argo reference table 8/ARGO_WMO_INST_TYPE <http://vocab.nerc.ac.uk/collection/R08/current/>`_.

Parameters
----------
profiler_label: str, list(str)
The string (not exact) to be found in profiler preferred labels.
The string (not necessarily exact) to be found in profiler preferred labels.

Returns
-------
Expand All @@ -514,8 +514,9 @@ def profiler_label(self, profiler_label: str, nrows=None, composed=False):
.. code-block:: python
:caption: List valid labels

from argopy import ArgoNVSReferenceTables
valid_labels = ArgoNVSReferenceTables().tbl(8)['prefLabel']
from argopy import ArgoReferenceTable
df = ar.ArgoReferenceTable('ARGO_WMO_INST_TYPE').to_dataframe()
valid_labels : list[str] = list(df['long_name'].to_dict().values())

See Also
--------
Expand Down Expand Up @@ -552,11 +553,42 @@ def composer(profiler_type):
self._obj.search_type.update(namer(profiler_label))
return search_filter

@abstractmethod
def profile_qc(self, param):
"""Search index for parameter profile QCs with a specific value

Parameters
----------
PARAMs: dict
A dictionary with parameters as keys, and profile QC as a string or a list of strings
logical: str, default='and'
Indicate to search for all (``and``) or any (``or``) of the parameters profile QC. This operator applies
between each parameter.

Returns
-------
:class:`ArgoIndex`

Examples
--------
.. code-block:: python

from argopy import ArgoIndex
idx = ArgoIndex(index_file='core+')

idx.query.profile_qc({'TEMP': 'A'})
idx.query.profile_qc({'PSAL': 'A'})
idx.query.profile_qc({'DOXY': ['A', 'B']})
idx.query.profile_qc({'PSAL': 'A', 'DOXY': 'A'}, logical='or')

"""
raise NotImplementedError("Not implemented")

@abstractmethod
def institution_code(self, institution_code, nrows=None, composed=False):
"""Search index for institution codes

The list of valid codes is given by IDs of `Argo reference table 4 <http://vocab.nerc.ac.uk/collection/R04/current/>`_.
The list of valid codes is given in `Argo reference table 4/DATA_CENTRE_CODES <http://vocab.nerc.ac.uk/collection/R04/current/>`_.

Parameters
----------
Expand All @@ -580,8 +612,8 @@ def institution_code(self, institution_code, nrows=None, composed=False):
.. code-block:: python
:caption: List valid codes

from argopy import ArgoNVSReferenceTables
valid_codes = ArgoNVSReferenceTables().tbl(4)['altLabel']
from argopy import ArgoReferenceTable
valid_codes : list[str] = ArgoReferenceTable('DATA_CENTRE_CODES').keys()

See Also
--------
Expand All @@ -593,12 +625,12 @@ def institution_code(self, institution_code, nrows=None, composed=False):
def institution_name(self, institution_name: str, nrows=None, composed=False):
"""Search index for institutions with a given string in their long name

Will search for string occurrences in the preferred label of `Argo reference table 4 <http://vocab.nerc.ac.uk/collection/R04/current/>`_.
Will search for string occurrences in the preferred label of `Argo reference table 4/DATA_CENTRE_CODES <http://vocab.nerc.ac.uk/collection/R04/current/>`_.

Parameters
----------
institution_name: str, list(str)
The string (not exact) to be found in institution preferred labels.
The string (not necessarily exact) to be found in institution preferred labels.

Returns
-------
Expand All @@ -617,8 +649,9 @@ def institution_name(self, institution_name: str, nrows=None, composed=False):
.. code-block:: python
:caption: List valid names

from argopy import ArgoNVSReferenceTables
valid_names = ArgoNVSReferenceTables().tbl(4)['prefLabel']
from argopy import ArgoReferenceTable
df = ar.ArgoReferenceTable('DATA_CENTRE_CODES').to_dataframe()
valid_names : list[str] = list(df['long_name'].to_dict().values())

See Also
--------
Expand Down Expand Up @@ -688,6 +721,24 @@ def dac(self, dac, nrows=None, composed=False):
"""
raise NotImplementedError("Not implemented")

@abstractmethod
def psal_adj(self):
"""Search (detailed) index for salinity adjustment values

Defined for for delayed mode or adjusted mode profiles only.

- Mean of psal_adjusted – psal on the deepest 500 meters with good psal_adjusted_qc (equal to 1)
- Standard deviation of psal_adjusted – psal on the deepest 500 meters with good psal_adjusted_qc (equal to 1)

"""
raise NotImplementedError("Not implemented")

@abstractmethod
def n_levels(self):
"""Search index profiles using the maximum number of pressure levels contained in a profile
"""
raise NotImplementedError("Not implemented")

def compose(self, query: dict, nrows=None):
"""Compose query with multiple search methods

Expand Down
101 changes: 98 additions & 3 deletions argopy/stores/index/implementations/pandas/search_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import pandas as pd
import numpy as np
from typing import List
from typing import List, Literal, Optional
from functools import lru_cache

from argopy.options import OPTIONS
Expand Down Expand Up @@ -48,7 +48,7 @@ def compute_params(param: str, obj):
class SearchEngine(ArgoIndexSearchEngine):

@search_s3
def wmo(self, WMOs, nrows=None, composed=False) -> indexstore:
def wmo(self, WMOs, nrows=None, composed=False):
def checker(WMOs):
WMOs = check_wmo(WMOs) # Check and return a valid list of WMOs
log.debug(
Expand Down Expand Up @@ -79,7 +79,7 @@ def composer(obj, WMOs):
return search_filter

@search_s3
def cyc(self, CYCs, nrows=None, composed=False) -> indexstore:
def cyc(self, CYCs, nrows=None, composed=False):
def checker(CYCs):
if self._obj.convention in ["ar_index_global_meta"]:
raise InvalidDatasetStructure(
Expand Down Expand Up @@ -564,3 +564,98 @@ def composer(DACs):
else:
self._obj.search_type.update(namer(dac))
return search_filter

def profile_qc(self, PARAMs: dict, logical="and", nrows=None, composed=False):
def checker(PARAMs):
if "profile_temp_qc" not in self._obj.convention_columns:
raise InvalidDatasetStructure("Cannot search for profile QC in this index)")
# Validate PARAMs
[
PARAMs.update({p: to_list(PARAMs[p])}) for p in PARAMs
]
if not np.all(
[v in ['', ' ', '1', 'A', 'B', 'C', 'D', 'E', 'F'] for vals in PARAMs.values() for v in vals]
):
raise ValueError("Profile QC must be a value in '', 'A', 'B', 'C', 'D', 'E', 'F'")
log.debug("Argo index searching for profile QC: %s ..." % PARAMs)
return PARAMs

def namer(PARAMs, logical):
return {"PROFQC": (PARAMs, logical)}

def composer(PARAMs, logical):
filt = []

for param in PARAMs:
qcflags = PARAMs[param]
filt.append(self._obj.index[f"profile_{param.lower()}_qc"].isin(qcflags))

return self._obj._reduce_a_filter_list(filt, op=logical)

PARAMs = checker(PARAMs)
self._obj.load(nrows=self._obj._nrows_index)
search_filter = composer(PARAMs, logical)
if not composed:
self._obj.search_type = namer(PARAMs, logical)
self._obj.search_filter = search_filter
self._obj.run(nrows=nrows)
return self._obj
else:
self._obj.search_type.update(namer(PARAMs, logical))
return search_filter

def psal_adj(
self,
where: Literal["mean", "dev"] = "mean",
ge: Optional[float] = 0.0,
le: Optional[float] = None,
nrows=None,
composed=False,
):
def checker(where: str, ge: Optional[float], le: Optional[float])-> [str, Optional[float], Optional[float]]:
if where.lower() not in ['mean', 'dev']:
raise ValueError(f"'{where}': The 'where' argument must be 'mean' or 'dev'.")
if "ad_psal_adjustment_mean" not in self._obj.convention_columns:
raise InvalidDatasetStructure(
"Cannot search for salinity adjustment mean in this index)"
)
if "ad_psal_adjustment_deviation" not in self._obj.convention_columns:
raise InvalidDatasetStructure(
"Cannot search for salinity adjustment deviation in this index)"
)

bounds = [where.lower(), ge, le]

if bounds[0] == 'dev' and bounds[2] is not None and bounds[2] < 0:
raise ValueError(f"Deviation lower limit must be zero or positive")

return bounds

def namer(bounds):
return {f"PSAL_ADJ_{bounds[0].upper()}": bounds[1:]}

def composer(obj, bounds):
filt = []
pname: str = (
"ad_psal_adjustment_mean"
if bounds[0] == "mean"
else "ad_psal_adjustment_deviation"
)
if bounds[1] is not None:
filt.append(obj.index[pname].ge(bounds[1]))
if bounds[2] is not None:
filt.append(obj.index[pname].le(bounds[2]))

return obj._reduce_a_filter_list(filt, op="and")

bounds = checker(where, ge, le)
self._obj.load(nrows=self._obj._nrows_index)
search_filter = composer(self._obj, bounds)
if not composed:
self._obj.search_type = namer(bounds)
self._obj.search_filter = search_filter
self._obj.run(nrows=nrows)
return self._obj
else:
self._obj.search_type.update(namer(bounds))
return search_filter
2 changes: 1 addition & 1 deletion argopy/stores/index/implementations/pyarrow/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ def convert_a_date(row):
]:
s = s.set_column(1, "date", new_date)

if self.convention == "ar_index_global_prof":
if self.convention in ["ar_index_global_prof", "argo_profile_detailled_index"]:
s = s.set_column(7, "date_update", new_date_update)
elif self.convention in [
"argo_bio-profile_index",
Expand Down
Loading
Loading