diff --git a/.github/ci-hpc-config.yml b/.github/ci-hpc-config.yml new file mode 100644 index 0000000..0cb5ecd --- /dev/null +++ b/.github/ci-hpc-config.yml @@ -0,0 +1,6 @@ +build: + python: 3.11 + parallel: 1 + pytest_cmd: | + python -m pytest --cov=./ --cov-report=xml + python -m coverage report diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml deleted file mode 100644 index 428dd22..0000000 --- a/.github/workflows/build_wheels.yml +++ /dev/null @@ -1,188 +0,0 @@ -# This file is autogenerated by maturin v1.7.7 -# To update, run -# -# maturin generate-ci github -# -name: Build Python Wheels and push to PyPI - -on: - release: - types: [published] - -permissions: - contents: read - -jobs: - linux: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: ubuntu-22.04 - target: x86_64 - - runner: ubuntu-22.04 - target: x86 - - runner: ubuntu-22.04 - target: aarch64 - - runner: ubuntu-22.04 - target: armv7 - - runner: ubuntu-22.04 - target: s390x - - runner: ubuntu-22.04 - target: ppc64le - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Set cargo version from tag - run: python .github/workflows/update_version.py - - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - manylinux: auto - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-linux-${{ matrix.platform.target }} - path: dist - - musllinux: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: ubuntu-22.04 - target: x86_64 - - runner: ubuntu-22.04 - target: x86 - - runner: ubuntu-22.04 - target: aarch64 - - runner: ubuntu-22.04 - target: armv7 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Set cargo version from tag - run: python .github/workflows/update_version.py - - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - manylinux: musllinux_1_2 - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-musllinux-${{ matrix.platform.target }} - path: dist - - windows: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: windows-latest - target: x64 - - runner: windows-latest - target: x86 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - architecture: ${{ matrix.platform.target }} - - name: Set cargo version from tag - run: python .github/workflows/update_version.py - - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-windows-${{ matrix.platform.target }} - path: dist - - macos: - runs-on: ${{ matrix.platform.runner }} - strategy: - matrix: - platform: - - runner: macos-13 - target: x86_64 - - runner: macos-14 - target: aarch64 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.x - - name: Set cargo version from tag - run: python .github/workflows/update_version.py - - name: Build wheels - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.platform.target }} - args: --release --out dist --find-interpreter - sccache: 'true' - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-macos-${{ matrix.platform.target }} - path: dist - - sdist: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set cargo version from tag - run: python .github/workflows/update_version.py - - name: Build sdist - uses: PyO3/maturin-action@v1 - with: - command: sdist - args: --out dist - - name: Upload sdist - uses: actions/upload-artifact@v4 - with: - name: wheels-sdist - path: dist - - release: - name: Release - runs-on: ubuntu-latest - if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} - needs: [linux, musllinux, windows, macos, sdist] - permissions: - # Use to sign the release artifacts - id-token: write - # Used to upload release artifacts - contents: write - # Used to generate artifact attestation - attestations: write - steps: - - uses: actions/download-artifact@v4 - - name: Generate artifact attestation - uses: actions/attest-build-provenance@v1 - with: - subject-path: 'wheels-*/*' - - name: Publish to PyPI - if: ${{ startsWith(github.ref, 'refs/tags/') }} - uses: PyO3/maturin-action@v1 - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} - with: - command: upload - args: --non-interactive --skip-existing wheels-*/* diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..6d55176 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,30 @@ +name: cd + +on: + push: + tags: + - '**' + +jobs: + pypi_binwheels: + uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi-binwheel.yml@v2 + secrets: inherit + with: + platforms: "['ubuntu-latest','macos-latest','windows-latest']" + pyversions: "['311','312','313']" + env_vars: | + { + "USE_RUST": "1", + "SETUPTOOLS_RUST_CARGO_PROFILE": "release" + } + + pypi_purepython: + needs: pypi_binwheels + uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi.yml@v2 + secrets: inherit + with: + env_vars: | + { + "USE_RUST": "0", + "SETUPTOOLS_RUST_CARGO_PROFILE": "release" + } diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index dc973c4..c2263dc 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,7 +4,6 @@ on: types: [opened, synchronize, reopened, closed] push: branches: - - "develop" - "main" tags: - "**" @@ -55,7 +54,6 @@ jobs: publish: if: >- ${{ github.event_name == 'push' && ( - github.ref_name == 'develop' || github.ref_name == 'main' || github.ref_type == 'tag' ) }} diff --git a/.github/workflows/downstream-ci.yml b/.github/workflows/downstream-ci.yml new file mode 100644 index 0000000..b4bbdaf --- /dev/null +++ b/.github/workflows/downstream-ci.yml @@ -0,0 +1,39 @@ +name: ci + +on: + # Trigger the workflow on push to master or develop, except tag creation + push: + branches: + - "main" + tags-ignore: + - "**" + + # Trigger the workflow on pull request + pull_request: ~ + + # Trigger the workflow manually + workflow_dispatch: ~ + + # Trigger after public PR approved for CI + pull_request_target: + types: [labeled] + +jobs: + # Run CI including downstream packages on self-hosted runners + downstream-ci: + name: downstream-ci + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + uses: ecmwf/downstream-ci/.github/workflows/downstream-ci.yml@feat/qubed + with: + qubed: ecmwf/qubed@${{ github.event.pull_request.head.sha || github.sha }} + # codecov_upload: true + python_qa: true + secrets: inherit + + downstream-ci-hpc: + name: downstream-ci-hpc + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + uses: ecmwf/downstream-ci/.github/workflows/downstream-ci-hpc.yml@feat/qubed + with: + qubed: ecmwf/qubed@${{ github.event.pull_request.head.sha || github.sha }} + secrets: inherit diff --git a/.github/workflows/label-public-pr.yml b/.github/workflows/label-public-pr.yml new file mode 100644 index 0000000..59b2bfa --- /dev/null +++ b/.github/workflows/label-public-pr.yml @@ -0,0 +1,10 @@ +# Manage labels of pull requests that originate from forks +name: label-public-pr + +on: + pull_request_target: + types: [opened, synchronize] + +jobs: + label: + uses: ecmwf-actions/reusable-workflows/.github/workflows/label-pr.yml@v2 diff --git a/.github/workflows/test-pypi.yml b/.github/workflows/test-pypi.yml new file mode 100644 index 0000000..7ef8081 --- /dev/null +++ b/.github/workflows/test-pypi.yml @@ -0,0 +1,31 @@ + +name: test-cd + +on: + pull_request: + branches: [ "main"] + +jobs: + pypi_binwheels: + uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi-binwheel.yml@v2 + secrets: inherit + with: + platforms: "['ubuntu-latest','macos-latest']" + pyversions: "['311','312']" + testpypi: true + env_vars: | + { + "USE_RUST": "1", + "SETUPTOOLS_RUST_CARGO_PROFILE": "release" + } + pypi_purepython: + needs: pypi_binwheels + uses: ecmwf/reusable-workflows/.github/workflows/cd-pypi.yml@v2 + secrets: inherit + with: + testpypi: true + env_vars: | + { + "USE_RUST": "0", + "SETUPTOOLS_RUST_CARGO_PROFILE": "release" + } diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index c5adf1e..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,52 +0,0 @@ -# This file is autogenerated by maturin v1.7.7 -# To update, run -# -# maturin generate-ci github -# -name: Test - -on: - push: - branches: - - main - - develop - pull_request: - workflow_dispatch: - - -permissions: - contents: read - -jobs: - test: - name: test with ${{ matrix.env }} on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - env: - - "3.13" - - "3.12" - - "3.11" - os: - - ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Install the latest version of uv - uses: astral-sh/setup-uv@v3 - - - name: Install tox - run: uv tool install --python-preference only-managed --python 3.13 tox --with tox-uv --with tox-gh - - name: Install Python - if: matrix.env != '3.13' - run: uv python install --python-preference only-managed ${{ matrix.env }} - - - name: Setup test suite - run: tox run -vv --notest --skip-missing-interpreters false - env: - TOX_GH_MAJOR_MINOR: ${{ matrix.env }} - - - name: Run test suite - run: tox run --skip-pkg-install - env: - TOX_GH_MAJOR_MINOR: ${{ matrix.env }} diff --git a/.github/workflows/test_docs.yml b/.github/workflows/test_docs.yml index 0a31f8f..945865a 100644 --- a/.github/workflows/test_docs.yml +++ b/.github/workflows/test_docs.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - develop pull_request: workflow_dispatch: diff --git a/.github/workflows/update_version.py b/.github/workflows/update_version.py deleted file mode 100755 index b1fe53a..0000000 --- a/.github/workflows/update_version.py +++ /dev/null @@ -1,42 +0,0 @@ -import re -import subprocess -from pathlib import Path - -CARGO_TOML_PATH = Path("Cargo.toml") - - -# Get the latest Git tag and strip the leading 'v' if present -def get_git_version(): - try: - version = subprocess.check_output( - ["git", "describe", "--tags", "--always"], text=True - ).strip() - version = re.sub(r"^v", "", version) # Remove leading 'v' - return version - except subprocess.CalledProcessError: - raise RuntimeError( - "Failed to get Git tag. Make sure you have at least one tag in the repository." - ) - - -# Update version in Cargo.toml -def update_cargo_version(new_version): - cargo_toml = CARGO_TOML_PATH.read_text() - - # Replace version in [package] section - updated_toml = re.sub( - r'^version = "[^"]+"', - f'version = "{new_version}"', - cargo_toml, - flags=re.MULTILINE, - ) - - CARGO_TOML_PATH.write_text(updated_toml) - - -if __name__ == "__main__": - version = get_git_version() - print(f"Parsed version: {version}") - - update_cargo_version(version) - print(f"Updated Cargo.toml with version: {version}") diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5fedb50..511afb3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,10 +9,20 @@ repos: exclude: "[^/.]+.cbor" # - id: check-yaml # - id: check-added-large-files +# - repo: https://github.com/pycqa/isort +# rev: 5.13.2 +# hooks: +# - id: isort +# name: isort (python) + +# - repo: https://github.com/psf/black +# rev: 24.8.0 # Use the latest version of black +# hooks: +# - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.7 + rev: v0.13.1 hooks: - - id: ruff + - id: ruff-check args: [ --fix ] - id: ruff-format diff --git a/fdb_scanner/scan.py b/fdb_scanner/scan.py index 8523738..ed474fe 100755 --- a/fdb_scanner/scan.py +++ b/fdb_scanner/scan.py @@ -21,7 +21,7 @@ Climate DT Gen 1 (Done): --full --selector class=d1,dataset=climate-dt,generation=1 --filepath tests/example_qubes/climate-dt-gen-1.json Climate DT Gen 2 (Ongoing) Full Weekly Scan: --full --selector class=d1,dataset=climate-dt,generation=2 --filepath tests/example_qubes/climate-dt-gen-2.json -Extremes DT Daily of last week: --last_n_days=7 --selector class=d1,dataset=extremes-dt --filepath tests/example_qubes/climate-dt.json +Extremes DT Daily of last week: --last_n_days=7 --selector class=d1,dataset=extremes-dt --filepath tests/example_qubes/climate-dt.json On Demand Extremes DT Full Daily scan: --full --selector class=d1,dataset=on-demand-extremes-dt --filepath tests/example_qubes/on-demand-extremes-dt.json Example crontab: @@ -37,88 +37,91 @@ # Climate dt gen 2 Weekly on sunday at 2am 0 2 * * SUN cd /home/eouser/qubed && ./.venv/bin/python3.12 ./fdb_scanner/scan.py --quiet --full --selector class=d1,dataset=climate-dt,generation=2 --filepath tests/example_qubes/climate-dt-gen-2.json >> ./fdb_scanner/logs/climate-dt.log 2>&1 """ + +import argparse import json +import os import subprocess -from datetime import datetime, timedelta, date +from datetime import date, datetime, timedelta +from enum import Enum +from pathlib import Path from time import time + import psutil -from qubed import Qube -from tqdm import tqdm import requests -import argparse -import os -from enum import Enum, auto -from pathlib import Path + +from qubed import Qube + class ScanMode(Enum): Full = "full" Partial = "partial" + def parse_args(): - parser = argparse.ArgumentParser(description='Convert data in an fdb into a qube (no metadata)') - + parser = argparse.ArgumentParser( + description="Convert data in an fdb into a qube (no metadata)" + ) + parser.add_argument( - '--selector', + "--selector", type=str, - help='Selector string eg class=d1,dataset=climate-dt,generation=1' + help="Selector string eg class=d1,dataset=climate-dt,generation=1", ) - + parser.add_argument( - '--filepath', + "--filepath", type=str, - help='Path to file (may not exist) eg tests/example_qubes/climate-dt-gen-1.json' + help="Path to file (may not exist) eg tests/example_qubes/climate-dt-gen-1.json", ) - + parser.add_argument( - '--api', + "--api", type=str, default="https://qubed.lumi.apps.dte.destination-earth.eu/api/v2", - help='API URL (default: %(default)s)' + help="API URL (default: %(default)s)", ) parser.add_argument( - '--api-secret', + "--api-secret", type=str, default="config/api.secret", - help='API Secret (default: %(default)s)' + help="API Secret (default: %(default)s)", ) - + parser.add_argument( - '--fdb-config', + "--fdb-config", type=str, default="config/fdb_config.yaml", - help='Configuration file path (must exist) (default: %(default)s)' + help="Configuration file path (must exist) (default: %(default)s)", ) - - parser.add_argument('--quiet', action="store_const", const=True, default=False) + + parser.add_argument("--quiet", action="store_const", const=True, default=False) # Mutually exclusive group for --full/--last_n_days mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( - '--full', - action='store_const', + "--full", + action="store_const", const=ScanMode.Full, - dest='full_or_partial', - help='Do a full scan (default)' - ) - mode_group.add_argument( - '--last_n_days', - type=int, - help='Scan the last n days' + dest="full_or_partial", + help="Do a full scan (default)", ) - + mode_group.add_argument("--last_n_days", type=int, help="Scan the last n days") + # Set default for full_or_partial parser.set_defaults(full_or_partial=ScanMode.Partial) - + args = parser.parse_args() - + if not os.path.exists(args.fdb_config): parser.error(f"Configuration file does not exist: {args.fdb_config}") if not os.path.exists(args.api_secret): parser.error(f"API secrets file does not exist: {args.api_secret}") - + return args + args = parse_args() process = psutil.Process() @@ -129,11 +132,12 @@ def parse_args(): def from_ecmwf_date(s: str) -> date: return datetime.strptime(s, "%Y%m%d").date() + def to_ecmwf_date(d: date) -> str: return d.strftime("%Y%m%d") -def run_command(command: list[str]) -> str: +def run_command(command: list[str]) -> str: return subprocess.run( command, text=True, @@ -143,11 +147,16 @@ def run_command(command: list[str]) -> str: check=True, ).stdout + start_time = datetime.now() print(f"Running scan at {start_time}") # Use fdb axes to determine date range -output = run_command([f"/usr/local/bin/fdb axes --json --config {args.fdb_config} --minimum-keys=class {args.selector}"]) +output = run_command( + [ + f"/usr/local/bin/fdb axes --json --config {args.fdb_config} --minimum-keys=class {args.selector}" + ] +) axes = json.loads(output) dates = [from_ecmwf_date(s) for s in axes["date"]] dataset_start_date, dataset_end_date = min(dates), max(dates) @@ -165,7 +174,8 @@ def run_command(command: list[str]) -> str: end_date = min(dataset_end_date, requested_end_date) dates_in_range = [d for d in dates if start_date <= d < end_date] -print(f""" +print( + f""" Doing a {args.full_or_partial.value} scan of the dataset Selector: {args.selector} Requested date range: {start_date} - {end_date} @@ -176,8 +186,9 @@ def run_command(command: list[str]) -> str: Full dataset date range: {dataset_start_date} - {dataset_end_date} Unique dates in that range: {len(dates)} - Estimated scan time (Assuming 120 day chunk size) (hh:mm::ss): {len(dates_in_range) * timedelta(seconds=1.12) + timedelta(seconds=24)} -""") + Estimated scan time (Assuming 120 day chunk size) (hh:mm::ss): {len(dates_in_range) * timedelta(seconds=1.12) + timedelta(seconds=24)} +""" +) current_span: tuple[date, date] = (end_date - chunk_size, end_date) qube = Qube.empty() @@ -196,7 +207,7 @@ def run_command(command: list[str]) -> str: print(f"Running command {command[0]}") print(f"Doing {current_span[0]} - {current_span[1]}") print(f"Current memory usage: {process.memory_info().rss / 1e9:.2g}GB") - + try: stdout = run_command(command) except Exception as e: @@ -210,7 +221,6 @@ def run_command(command: list[str]) -> str: def split(t): return t[0], t[1].split("/") - request = dict(split(v.split("=")) for v in line.strip().split(",")) # Remove year and month from request @@ -218,37 +228,58 @@ def split(t): request.pop("month", None) # Order the keys - key_order = ["class", "dataset", "stream", "activity", "resolution", "expver", "experiment", "generation", "model", "realization", "type", "date", "time", "datetime", "levtype", "levelist", "step", "param"] - request = {k : request[k] for k in key_order if k in request} - - q = (Qube.from_datacube(request) - .convert_dtypes({ - "generation": int, - "realization": int, - "param": int, - "date": lambda s: datetime.strptime(s, "%Y%m%d").date()}) - ) + key_order = [ + "class", + "dataset", + "stream", + "activity", + "resolution", + "expver", + "experiment", + "generation", + "model", + "realization", + "type", + "date", + "time", + "datetime", + "levtype", + "levelist", + "step", + "param", + ] + request = {k: request[k] for k in key_order if k in request} + + q = Qube.from_datacube(request).convert_dtypes( + { + "generation": int, + "realization": int, + "param": int, + "date": lambda s: datetime.strptime(s, "%Y%m%d").date(), + } + ) subqube = subqube | q - - if not args.quiet: subqube.print(depth=2) + + if not args.quiet: + subqube.print(depth=2) qube = qube | subqube if not args.quiet: - print(f"{subqube.n_nodes = }, {subqube.n_leaves = },") + print(f"{subqube.n_nodes=}, {subqube.n_leaves=},") print("Added to qube") - print(f"{qube.n_nodes = }, {qube.n_leaves = },") - + print(f"{qube.n_nodes=}, {qube.n_leaves=},") # Send to the API r = requests.post( - args.api + "/union/", - headers = {"Authorization" : f"Bearer {secret}"}, - json = subqube.to_json()) - - if not args.quiet: + args.api + "/union/", + headers={"Authorization": f"Bearer {secret}"}, + json=subqube.to_json(), + ) + + if not args.quiet: print(f"Sent to server and got {r}") print( - f"Did that taking {(time() - t0) / chunk_size.days:2g} seconds per day ingested, total {(time() - t0):2g}s\n" + f"Did that taking {(time() - t0) / chunk_size.days:2g} seconds per day ingested, total {(time() - t0):2g}s\n" ) current_span = (current_span[0] - chunk_size, current_span[0]) @@ -259,7 +290,7 @@ def split(t): # Load in the existing qube from disk try: existing_qube = Qube.load(args.filepath) -except: +except Exception: print(f"Could not load {args.filepath}!") existing_qube = Qube.empty() @@ -285,12 +316,14 @@ def split(t): # Delete the temporary file tmp_file = Path(args.filepath + ".tmp") -if tmp_file.exists(): tmp_file.unlink() +if tmp_file.exists(): + tmp_file.unlink() # Upload the whole thing to the API r = requests.post( - args.api + "/union/", - headers = {"Authorization" : f"Bearer {secret}"}, - json = qube.to_json()) + args.api + "/union/", + headers={"Authorization": f"Bearer {secret}"}, + json=qube.to_json(), +) -print(f"Done in {datetime.now() - start_time}!") \ No newline at end of file +print(f"Done in {datetime.now() - start_time}!") diff --git a/fdb_scanner/upload_all.py b/fdb_scanner/upload_all.py index 6188b8d..ee79496 100644 --- a/fdb_scanner/upload_all.py +++ b/fdb_scanner/upload_all.py @@ -1,47 +1,46 @@ -import json -import subprocess -from datetime import datetime, timedelta, date -from time import time -import psutil -from qubed import Qube -import requests import argparse +import requests + +from qubed import Qube + + def parse_args(): - parser = argparse.ArgumentParser(description='Upload all qubes to the api') + parser = argparse.ArgumentParser(description="Upload all qubes to the api") parser.add_argument( - '--api', + "--api", type=str, default="https://qubed.lumi.apps.dte.destination-earth.eu/api/v2", - help='API URL (default: %(default)s)' + help="API URL (default: %(default)s)", ) parser.add_argument( - '--api-secret', + "--api-secret", type=str, default="config/api.secret", - help='API Secret (default: %(default)s)' + help="API Secret (default: %(default)s)", ) - - - args = parser.parse_args() return args + args = parse_args() +with open(args.api_secret, "r") as f: + secret = f.read() filepaths = [] for f in filepaths: -try: - qube = Qube.load(args.filepath) -except: - print(f"Could not load {args.filepath}, using empty qube.") - qube = Qube.empty() + try: + qube = Qube.load(args.filepath) + except Exception: + print(f"Could not load {args.filepath}, using empty qube.") + qube = Qube.empty() -r = requests.post( + r = requests.post( args.api + "/union/", - headers = {"Authorization" : f"Bearer {secret}"}, - json = qube.to_json()) \ No newline at end of file + headers={"Authorization": f"Bearer {secret}"}, + json=qube.to_json(), + ) diff --git a/pyproject.toml b/pyproject.toml index 9bc7e9e..6d18f31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -requires-python = ">= 3.11" +requires-python = ">= 3.10" dynamic = ["version"] dependencies = [ "frozendict", diff --git a/src/python/qubed/Qube.py b/src/python/qubed/Qube.py index de3e826..84d807b 100644 --- a/src/python/qubed/Qube.py +++ b/src/python/qubed/Qube.py @@ -16,13 +16,7 @@ from frozendict import frozendict from . import set_operations -from .formatters import ( - HTML, - _display, - info, - node_tree_to_html, - node_tree_to_string, -) +from .formatters import HTML, _display, info, node_tree_to_html, node_tree_to_string from .metadata import add_metadata, from_nodes, leaves_with_metadata, metadata_info from .protobuf.adapters import from_protobuf, to_protobuf from .selection import SelectMode, select @@ -40,11 +34,7 @@ to_json, ) from .types import NodeType -from .value_types import ( - QEnum, - ValueGroup, - WildcardGroup, -) +from .value_types import QEnum, ValueGroup, WildcardGroup @dataclass @@ -559,7 +549,7 @@ def compare_metadata(self, B: Qube) -> bool: return False for k in self.metadata.keys(): if k not in B.metadata: - print(f"'{k}' not in {B.metadata.keys() = }") + print(f"'{k}' not in {B.metadata.keys()=}") return False if not np.array_equal(self.metadata[k], B.metadata[k]): print(f"self.metadata[{k}] != B.metadata.[{k}]") diff --git a/src/python/qubed/formatters.py b/src/python/qubed/formatters.py index 8061993..386015a 100644 --- a/src/python/qubed/formatters.py +++ b/src/python/qubed/formatters.py @@ -352,7 +352,8 @@ def info(qube: Qube): {metadata} """ - print(f""" + print( + f""" This qube has {humanize.intword(qube.n_nodes)} nodes {humanize.intword(qube.n_leaves)} individual leaves @@ -362,4 +363,5 @@ def info(qube: Qube): --- Axes Info ------- {axes} {metadata} - """) + """ + ) diff --git a/src/python/qubed/metadata.py b/src/python/qubed/metadata.py index b20fa2a..55d2626 100644 --- a/src/python/qubed/metadata.py +++ b/src/python/qubed/metadata.py @@ -35,9 +35,11 @@ def make_node( return cls.make_node( key=key, values=QEnum(values), - metadata={k: to_numpy_array(v, shape) for k, v in metadata.items()} - if metadata is not None - else {}, + metadata=( + {k: to_numpy_array(v, shape) for k, v in metadata.items()} + if metadata is not None + else {} + ), children=children, ) diff --git a/src/python/qubed/protobuf/qube_pb2.pyi b/src/python/qubed/protobuf/qube_pb2.pyi index 2c0e994..8fba700 100644 --- a/src/python/qubed/protobuf/qube_pb2.pyi +++ b/src/python/qubed/protobuf/qube_pb2.pyi @@ -1,14 +1,13 @@ -from google.protobuf.internal import containers as _containers -from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper +from typing import ClassVar as _ClassVar +from typing import Iterable as _Iterable +from typing import Mapping as _Mapping +from typing import Optional as _Optional +from typing import Union as _Union + from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ( - ClassVar as _ClassVar, - Iterable as _Iterable, - Mapping as _Mapping, - Optional as _Optional, - Union as _Union, -) +from google.protobuf.internal import containers as _containers +from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper DESCRIPTOR: _descriptor.FileDescriptor @@ -63,6 +62,7 @@ class MetadataGroup(_message.Message): class Qube(_message.Message): __slots__ = ("key", "values", "type", "metadata", "children") + class MetadataEntry(_message.Message): __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] diff --git a/src/python/qubed/serialisation.py b/src/python/qubed/serialisation.py index 21ad904..dcdb72a 100644 --- a/src/python/qubed/serialisation.py +++ b/src/python/qubed/serialisation.py @@ -183,11 +183,11 @@ def from_json(json: dict, depth=0) -> Qube: key=json["key"], values=values_from_json(json["values"]), type=type, - metadata=frozendict( - {k: numpy_from_json(v) for k, v in json["metadata"].items()} - ) - if "metadata" in json - else {}, + metadata=( + frozendict({k: numpy_from_json(v) for k, v in json["metadata"].items()}) + if "metadata" in json + else {} + ), children=children, ) @@ -266,11 +266,11 @@ def from_cbor(json: dict, depth=0) -> Qube: key=json["key"], values=values_from_json(json["values"]), type=type, - metadata=frozendict( - {k: numpy_from_cbor(v) for k, v in json["metadata"].items()} - ) - if "metadata" in json - else {}, + metadata=( + frozendict({k: numpy_from_cbor(v) for k, v in json["metadata"].items()}) + if "metadata" in json + else {} + ), children=children, ) diff --git a/stac_server/main.py b/stac_server/main.py index 773e91a..cf4fab9 100644 --- a/stac_server/main.py +++ b/stac_server/main.py @@ -11,6 +11,7 @@ from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from markupsafe import Markup + from qubed import Qube from qubed.formatters import node_tree_to_html @@ -209,9 +210,9 @@ def make_link(child_request): stac_collection = { "type": "Catalog", "stac_version": "1.0.0", - "id": "root" - if not request - else "/".join(f"{k}={v}" for k, v in request.items()), + "id": ( + "root" if not request else "/".join(f"{k}={v}" for k, v in request.items()) + ), "title": f"{this_key}={this_value}", "description": value_info, "links": [make_link(leaf) for leaf in q.leaves()], diff --git a/test_scripts/get_climate_dt_metadata.py b/test_scripts/get_climate_dt_metadata.py index 1e7b093..7069951 100644 --- a/test_scripts/get_climate_dt_metadata.py +++ b/test_scripts/get_climate_dt_metadata.py @@ -1,29 +1,25 @@ # Scan the last 7 days of the extremes dt once a day -import os -os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" -os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" - import json -import subprocess -from datetime import datetime, timedelta -from time import time +import os +import sys +from datetime import datetime import psutil -from qubed import Qube -from tqdm import tqdm -import requests import pyfdb import yaml -import sys -from pathlib import Path +from qubed import Qube + +os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" +os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" + process = psutil.Process() SELECTOR = { - "class" : "d1", - "dataset" : "climate-dt", + "class": "d1", + "dataset": "climate-dt", "year": "2025", - "month": "4/5/6/7/8/9/10/11/12" + "month": "4/5/6/7/8/9/10/11/12", } FILEPATH = "tests/example_qubes/climate-dt_with_metadata_one_year_2025.json" API = "https://qubed-dev-.lumi.apps.dte.destination-earth.eu/api/v2" @@ -37,9 +33,11 @@ def from_ecmwf_date(s: str) -> datetime: return datetime.strptime(s, "%Y%m%d") + def to_ecmwf_date(d: datetime) -> str: return d.strftime("%Y%m%d") + with open(CONFIG) as f: config = yaml.safe_load(f) @@ -47,7 +45,7 @@ def to_ecmwf_date(d: datetime) -> str: try: qube = Qube.load(FILEPATH) -except: +except Exception: print(f"Could not load {FILEPATH}, using empty qube.") qube = Qube.empty() @@ -62,34 +60,53 @@ def to_ecmwf_date(d: datetime) -> str: # time = request.pop("time") # request["datetime"] = datetime.strptime(date + time, "%Y%m%d%H%M") - key_order = ["class", "dataset", "stream", "activity", "resolution", "expver", "experiment", "generation", "model", "realization", "type", "datetime", "date", "time", "levtype", "levelist", "step", "param"] - request = {k : request[k] for k in key_order if k in request} + key_order = [ + "class", + "dataset", + "stream", + "activity", + "resolution", + "expver", + "experiment", + "generation", + "model", + "realization", + "type", + "datetime", + "date", + "time", + "levtype", + "levelist", + "step", + "param", + ] + request = {k: request[k] for k in key_order if k in request} # Split path into three parts # p = Path(metadata.pop("path")) # part_0 = p.parents[1] # part_1 = p.parents[0].relative_to(part_0) # part_2 = p.name - + # metadata["path_0"] = str(part_0) # metadata["path_1"] = str(part_1) # metadata["path_2"] = str(part_2) - - q = ( Qube.from_datacube(request) .add_metadata(metadata) - .convert_dtypes({ - "generation": int, - "realization": int, - "param": int, - "date": lambda s: datetime.strptime(s, "%Y%m%d").date() - }) + .convert_dtypes( + { + "generation": int, + "realization": int, + "param": int, + "date": lambda s: datetime.strptime(s, "%Y%m%d").date(), + } ) + ) qube = qube | q - if i % 5000 == 0: + if i % 5000 == 0: print(i, request, metadata) with open(FILEPATH, "w") as f: json.dump(qube.to_json(), f) @@ -102,4 +119,4 @@ def to_ecmwf_date(d: datetime) -> str: json.dump(qube.to_json(), f) sys.exit() -print("done") \ No newline at end of file +print("done") diff --git a/test_scripts/get_extremes_metadata.py b/test_scripts/get_extremes_metadata.py index 159a33f..cc1c5ba 100644 --- a/test_scripts/get_extremes_metadata.py +++ b/test_scripts/get_extremes_metadata.py @@ -1,27 +1,23 @@ # Scan the last 7 days of the extremes dt once a day -import os -os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" -os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" - import json -import subprocess -from datetime import datetime, timedelta -from time import time +import os +import sys +from datetime import datetime import psutil -from qubed import Qube -from tqdm import tqdm -import requests import pyfdb import yaml -import sys -from pathlib import Path +from qubed import Qube + +os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" +os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" + process = psutil.Process() SELECTOR = { - "class" : "d1", - "dataset" : "extremes-dt", + "class": "d1", + "dataset": "extremes-dt", } FILEPATH = "tests/example_qubes/extremes-dt_with_metadata.json" API = "https://qubed-dev.lumi.apps.dte.destination-earth.eu/api/v2" @@ -39,7 +35,7 @@ try: qube = Qube.load(FILEPATH) -except: +except Exception: print(f"Could not load {FILEPATH}, using empty qube.") qube = Qube.empty() @@ -50,37 +46,56 @@ request.pop("month", None) date = request.pop("date") - time = request.pop("time") - request["datetime"] = datetime.strptime(date + time, "%Y%m%d%H%M") - - key_order = ["class", "dataset", "stream", "activity", "resolution", "expver", "experiment", "generation", "model", "realization", "type", "datetime", "date", "time", "levtype", "levelist", "step", "param"] - request = {k : request[k] for k in key_order if k in request} + req_time = request.pop("time") + request["datetime"] = datetime.strptime(date + req_time, "%Y%m%d%H%M") + + key_order = [ + "class", + "dataset", + "stream", + "activity", + "resolution", + "expver", + "experiment", + "generation", + "model", + "realization", + "type", + "datetime", + "date", + "time", + "levtype", + "levelist", + "step", + "param", + ] + request = {k: request[k] for k in key_order if k in request} # Split path into three parts # p = Path(metadata.pop("path")) # part_0 = p.parents[1] # part_1 = p.parents[0].relative_to(part_0) # part_2 = p.name - + # metadata["path_0"] = str(part_0) # metadata["path_1"] = str(part_1) # metadata["path_2"] = str(part_2) - - q = ( Qube.from_datacube(request) .add_metadata(metadata) - .convert_dtypes({ - "generation": int, - "realization": int, - "param": int, - "date": lambda s: datetime.strptime(s, "%Y%m%d").date() - }) + .convert_dtypes( + { + "generation": int, + "realization": int, + "param": int, + "date": lambda s: datetime.strptime(s, "%Y%m%d").date(), + } ) + ) qube = qube | q - if i % 5000 == 0: + if i % 5000 == 0: print(i, request, metadata) with open(FILEPATH, "w") as f: json.dump(qube.to_json(), f) @@ -94,4 +109,4 @@ json.dump(qube.to_json(), f) sys.exit() -print("done") \ No newline at end of file +print("done") diff --git a/test_scripts/get_ondemand_metadata.py b/test_scripts/get_ondemand_metadata.py index 9ce0ec2..7549ae4 100644 --- a/test_scripts/get_ondemand_metadata.py +++ b/test_scripts/get_ondemand_metadata.py @@ -1,27 +1,23 @@ # Scan the last 7 days of the extremes dt once a day -import os -os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" -os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" - import json -import subprocess -from datetime import datetime, timedelta -from time import time +import os +import sys +from datetime import datetime import psutil -from qubed import Qube -from tqdm import tqdm -import requests import pyfdb import yaml -import sys -from pathlib import Path +from qubed import Qube + +os.environ["ECCODES_PYTHON_USE_FINDLIBS"] = "1" +os.environ["FDB5_HOME"] = "/home/eouser/fdb_bundle/build" + process = psutil.Process() SELECTOR = { - "class" : "d1", - "dataset" : "on-demand-extremes-dt", + "class": "d1", + "dataset": "on-demand-extremes-dt", } FILEPATH = "tests/example_qubes/on-demand-extremes-dt_with_metadata.json" API = "https://qubed-dev.lumi.apps.dte.destination-earth.eu/api/v2" @@ -35,9 +31,11 @@ def from_ecmwf_date(s: str) -> datetime: return datetime.strptime(s, "%Y%m%d") + def to_ecmwf_date(d: datetime) -> str: return d.strftime("%Y%m%d") + with open(CONFIG) as f: config = yaml.safe_load(f) @@ -45,7 +43,7 @@ def to_ecmwf_date(d: datetime) -> str: try: qube = Qube.load(FILEPATH) -except: +except Exception: print(f"Could not load {FILEPATH}, using empty qube.") qube = Qube.empty() @@ -56,37 +54,56 @@ def to_ecmwf_date(d: datetime) -> str: request.pop("month", None) date = request.pop("date") - time = request.pop("time") - request["datetime"] = datetime.strptime(date + time, "%Y%m%d%H%M") - - key_order = ["class", "dataset", "stream", "activity", "resolution", "expver", "experiment", "generation", "model", "realization", "type", "datetime", "date", "time", "levtype", "levelist", "step", "param"] - request = {k : request[k] for k in key_order if k in request} + req_time = request.pop("time") + request["datetime"] = datetime.strptime(date + req_time, "%Y%m%d%H%M") + + key_order = [ + "class", + "dataset", + "stream", + "activity", + "resolution", + "expver", + "experiment", + "generation", + "model", + "realization", + "type", + "datetime", + "date", + "time", + "levtype", + "levelist", + "step", + "param", + ] + request = {k: request[k] for k in key_order if k in request} # Split path into three parts # p = Path(metadata.pop("path")) # part_0 = p.parents[1] # part_1 = p.parents[0].relative_to(part_0) # part_2 = p.name - + # metadata["path_0"] = str(part_0) # metadata["path_1"] = str(part_1) # metadata["path_2"] = str(part_2) - - q = ( Qube.from_datacube(request) .add_metadata(metadata) - .convert_dtypes({ - "generation": int, - "realization": int, - "param": int, - "date": lambda s: datetime.strptime(s, "%Y%m%d").date() - }) + .convert_dtypes( + { + "generation": int, + "realization": int, + "param": int, + "date": lambda s: datetime.strptime(s, "%Y%m%d").date(), + } ) + ) qube = qube | q - if i % 5000 == 0: + if i % 5000 == 0: print(i, request, metadata) with open(FILEPATH, "w") as f: json.dump(qube.to_json(), f) @@ -99,4 +116,4 @@ def to_ecmwf_date(d: datetime) -> str: json.dump(qube.to_json(), f) sys.exit() -print("done") \ No newline at end of file +print("done") diff --git a/test_scripts/update_climate_dt.py b/test_scripts/update_climate_dt.py index 98c1373..f3fc72a 100644 --- a/test_scripts/update_climate_dt.py +++ b/test_scripts/update_climate_dt.py @@ -5,10 +5,10 @@ from time import time import psutil -from qubed import Qube -from tqdm import tqdm import requests -import sys +from tqdm import tqdm + +from qubed import Qube process = psutil.Process() SELECTOR = "class=d1,dataset=climate-dt" @@ -24,6 +24,7 @@ def from_ecmwf_date(s: str) -> datetime: return datetime.strptime(s, "%Y%m%d") + def to_ecmwf_date(d: datetime) -> str: return d.strftime("%Y%m%d") @@ -31,9 +32,7 @@ def to_ecmwf_date(d: datetime) -> str: if FULL_OR_PARTIAL == "FULL": # Full scan CHUNK_SIZE = timedelta(days=120) - command = [ - f"fdb axes --json --config {CONFIG} --minimum-keys=class {SELECTOR}" - ] + command = [f"fdb axes --json --config {CONFIG} --minimum-keys=class {SELECTOR}"] p = subprocess.run( command, @@ -47,8 +46,10 @@ def to_ecmwf_date(d: datetime) -> str: dates = [from_ecmwf_date(s) for s in axes["date"]] start_date = min(dates) end_date = max(dates) - - print(f"Used fdb axes to determine full date range of data to be: {start_date} - {end_date}") + + print( + f"Used fdb axes to determine full date range of data to be: {start_date} - {end_date}" + ) else: # Partial scan @@ -60,12 +61,11 @@ def to_ecmwf_date(d: datetime) -> str: try: qube = Qube.load(FILEPATH) -except: +except Exception: print(f"Could not load {FILEPATH}, using empty qube.") qube = Qube.empty() - while current_span[0] > start_date: t0 = time() start, end = map(to_ecmwf_date, current_span) @@ -97,33 +97,52 @@ def to_ecmwf_date(d: datetime) -> str: def split(t): return t[0], t[1].split("/") - request = dict(split(v.split("=")) for v in line.strip().split(",")) request.pop("year", None) request.pop("month", None) - key_order = ["class", "dataset", "stream", "activity", "resolution", "expver", "experiment", "generation", "model", "realization", "type", "date", "time", "levtype", "levelist", "step", "param"] - request = {k : request[k] for k in key_order if k in request} - - q = (Qube.from_datacube(request) - .convert_dtypes({ - "generation": int, - "realization": int, - "param": int, - "date": lambda s: datetime.strptime(s, "%Y%m%d")}) - ) + key_order = [ + "class", + "dataset", + "stream", + "activity", + "resolution", + "expver", + "experiment", + "generation", + "model", + "realization", + "type", + "date", + "time", + "levtype", + "levelist", + "step", + "param", + ] + request = {k: request[k] for k in key_order if k in request} + + q = Qube.from_datacube(request).convert_dtypes( + { + "generation": int, + "realization": int, + "param": int, + "date": lambda s: datetime.strptime(s, "%Y%m%d"), + } + ) subqube = subqube | q - + subqube.print(depth=2) - print(f"{subqube.n_nodes = }, {subqube.n_leaves = },") + print(f"{subqube.n_nodes=}, {subqube.n_leaves=},") print("added to qube") qube = qube | subqube - print(f"{qube.n_nodes = }, {qube.n_leaves = },") + print(f"{qube.n_nodes=}, {qube.n_leaves=},") r = requests.post( - API + "/union/", - headers = {"Authorization" : f"Bearer {secret}"}, - json = subqube.to_json()) + API + "/union/", + headers={"Authorization": f"Bearer {secret}"}, + json=subqube.to_json(), + ) print(f"sent to server and got {r}") current_span = [current_span[0] - CHUNK_SIZE, current_span[0]] diff --git a/tests/test_basic_operations.py b/tests/test_basic_operations.py index 0e1eabe..28d7457 100644 --- a/tests/test_basic_operations.py +++ b/tests/test_basic_operations.py @@ -1,9 +1,11 @@ from datetime import datetime import pytest + from qubed import Qube -q = Qube.from_tree(""" +q = Qube.from_tree( + """ root ├── class=od │ ├── expver=0001 @@ -20,11 +22,13 @@ └── expver=0002 ├── param=1 └── param=2 -""") +""" +) def test_getitem(): - assert q["class", "od"] == Qube.from_tree(""" + assert q["class", "od"] == Qube.from_tree( + """ root ├── expver=0001 │ ├── param=1 @@ -32,12 +36,15 @@ def test_getitem(): └── expver=0002 ├── param=1 └── param=2 -""") +""" + ) - assert q["class", "od"]["expver", "0001"] == Qube.from_tree(""" + assert q["class", "od"]["expver", "0001"] == Qube.from_tree( + """ root ├── param=1 -└── param=2""") +└── param=2""" + ) def test_n_leaves(): @@ -193,13 +200,15 @@ def test_value_dtypes(): def test_flattten(): - q = Qube.from_tree(""" + q = Qube.from_tree( + """ root ├── class=od, expver=0001/0002, param=1/2 └── class=rd ├── expver=0001, param=1/2/3 └── expver=0002, param=1/2 - """) + """ + ) assert q.flatten() == Qube.from_json( { @@ -298,12 +307,14 @@ def test_flattten(): def test_invalid_from_dict(): with pytest.raises(ValueError): - q = Qube.from_tree(""" + q = Qube.from_tree( + """ root ├── class=od, expver=0001/0002, param=1/2 └── class=rd ├── expver=0001, param=1/2/3 └── expver=0002, param=1/2 - """) + """ + ) q.flatten().to_dict() diff --git a/tests/test_iteration.py b/tests/test_iteration.py index 0e9ad03..02025af 100644 --- a/tests/test_iteration.py +++ b/tests/test_iteration.py @@ -1,4 +1,5 @@ from frozendict import frozendict + from qubed import Qube @@ -19,13 +20,15 @@ def make_hashable(list_like): def test_datacubes(): - q = Qube.from_tree(""" + q = Qube.from_tree( + """ root, class=d1 ├── date=19920101/19930101/19940101, params=1/2/3 └── date=19950101 ├── level=1/2/3, params=1/2/3/4 └── params=1/2/3/4 - """) + """ + ) assert len(list(q.datacubes())) == 3 assert list(q.datacubes()) == [ diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 6772a82..639df76 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -5,6 +5,7 @@ import numpy as np import pytest from frozendict import frozendict + from qubed import Qube diff --git a/tests/test_rust.py b/tests/test_rust.py index 65a8513..eddfa01 100644 --- a/tests/test_rust.py +++ b/tests/test_rust.py @@ -1,9 +1,7 @@ from __future__ import annotations - from qubed.rust import Qube as Qube - # def test_from_json(): # q = pyQube.from_tree(""" # root, class=d1