diff --git a/.github/workflows/integration-cloud.yml b/.github/workflows/integration-cloud.yml index c19df77f8..b3801085e 100644 --- a/.github/workflows/integration-cloud.yml +++ b/.github/workflows/integration-cloud.yml @@ -20,38 +20,38 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - id: setup-python + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: + version: "0.6.16" python-version: "3.10" - cache: "pip" - - run: echo '${{ steps.setup-python.outputs.cache-hit }}' # true if cache-hit occurred on the primary key + enable-cache: true + - name: Install dependencies run: | - pip install -e .[dev] + uv sync --extra dev - name: Run integration tests env: VESPA_TEAM_API_KEY: ${{ secrets.VESPA_TEAM_API_KEY }} run: | - pytest tests/integration/test_integration_vespa_cloud.py -s -v + uv run pytest tests/integration/test_integration_vespa_cloud.py -s -v integration-cloud-token: runs-on: ubuntu-latest steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - id: setup-python + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: + version: "0.6.16" python-version: "3.10" - cache: "pip" - - run: echo '${{ steps.setup-python.outputs.cache-hit }}' # true if cache-hit occurred on the primary key + enable-cache: true + - name: Install dependencies run: | - pip install -e .[dev] + uv sync --extra dev - name: Run integration tests env: @@ -59,26 +59,25 @@ jobs: VESPA_CLOUD_SECRET_TOKEN: ${{ secrets.VESPA_CLOUD_SECRET_TOKEN }} VESPA_CLIENT_TOKEN_ID: ${{ secrets.VESPA_CLIENT_TOKEN_ID }} run: | - pytest tests/integration/test_integration_vespa_cloud_token.py -s -v + uv run pytest tests/integration/test_integration_vespa_cloud_token.py -s -v integration-cloud-vector-search: runs-on: ubuntu-latest steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - id: setup-python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: + version: "0.6.16" python-version: "3.10" - cache: "pip" - - run: echo '${{ steps.setup-python.outputs.cache-hit }}' # true if cache-hit occurred on the primary key + enable-cache: true - name: Install dependencies run: | - pip install -e .[dev] + uv sync --extra dev - name: Run integration tests env: VESPA_TEAM_API_KEY: ${{ secrets.VESPA_TEAM_API_KEY }} run: | - pytest tests/integration/test_integration_vespa_cloud_vector_search.py -s -v + uv run pytest tests/integration/test_integration_vespa_cloud_vector_search.py -s -v diff --git a/.github/workflows/integration-except-cloud.yml b/.github/workflows/integration-except-cloud.yml index 40932f1a7..2eb30288d 100644 --- a/.github/workflows/integration-except-cloud.yml +++ b/.github/workflows/integration-except-cloud.yml @@ -5,6 +5,7 @@ on: pull_request: branches: - master + paths: [".github/workflows/integration-cloud.yml"] push: branches: - master @@ -27,18 +28,17 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: - python-version: "3.9" - cache: "pip" - cache-dependency-path: | - pyproject.toml + version: "0.6.16" + python-version: "3.10" + enable-cache: true - name: Install dependencies run: | - pip install -e .[dev] + uv sync --extra dev - name: Run integration test run: | - pytest tests/integration/${{ matrix.test-suite }} -s -v + uv run pytest tests/integration/${{ matrix.test-suite }} -s -v diff --git a/.github/workflows/mktestdocs.yml b/.github/workflows/mktestdocs.yml index c9cff29ca..0c73ddecf 100644 --- a/.github/workflows/mktestdocs.yml +++ b/.github/workflows/mktestdocs.yml @@ -20,9 +20,6 @@ jobs: uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: version: "0.6.16" - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - with: python-version: "3.10" - name: Install dependencies diff --git a/.github/workflows/notebooks-cloud.yml b/.github/workflows/notebooks-cloud.yml index 02b865af0..d1d842847 100644 --- a/.github/workflows/notebooks-cloud.yml +++ b/.github/workflows/notebooks-cloud.yml @@ -47,14 +47,12 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: + version: "0.6.16" python-version: "3.10" - - - name: Set up uv - # Install a specific uv version using the installer - run: curl -LsSf https://astral.sh/uv/0.4.5/install.sh | sh + enable-cache: true - name: Install dependencies run: | diff --git a/.github/workflows/notebooks-except-cloud.yml b/.github/workflows/notebooks-except-cloud.yml index 04ab4df29..092d6b015 100644 --- a/.github/workflows/notebooks-except-cloud.yml +++ b/.github/workflows/notebooks-except-cloud.yml @@ -42,19 +42,18 @@ jobs: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: + version: "0.6.16" python-version: "3.10" - cache: "pip" - cache-dependency-path: | - pyproject.toml + enable-cache: true - name: Install dependencies run: | - pip install --no-cache-dir -e .[dev,notebooks] + uv sync --extra dev --extra notebooks - name: Run notebooks tests run: | echo "Running ${{ matrix.notebook }}" - papermill --log-output ${{ matrix.notebook }} ${{ matrix.notebook }}-out.ipynb + uv runpapermill --log-output ${{ matrix.notebook }} ${{ matrix.notebook }}-out.ipynb diff --git a/.github/workflows/pyvespa-unit-tests.yml b/.github/workflows/pyvespa-unit-tests.yml index 88498b276..c6b177763 100644 --- a/.github/workflows/pyvespa-unit-tests.yml +++ b/.github/workflows/pyvespa-unit-tests.yml @@ -19,19 +19,17 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + - name: Install uv + uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6 with: - python-version: ${{ matrix.python-version }} - - - name: Upgrade pip - run: | - python -m pip install --upgrade pip + version: "0.6.16" + python-version: "${{ matrix.python-version }}" + enable-cache: true - name: Install dependencies run: | - pip install -e .[unittest] + uv sync --extra unittest - name: Run tests run: | - pytest tests/unit -s -v + uv run pytest tests/unit -s -v diff --git a/copilot-instructions.md b/copilot-instructions.md new file mode 100644 index 000000000..5a8e3b800 --- /dev/null +++ b/copilot-instructions.md @@ -0,0 +1,126 @@ +# Project Overview + +This project is **pyvespa**, a Python API for [Vespa.ai](https://vespa.ai/) - the scalable open-source search engine. pyvespa enables users to create, modify, deploy, and interact with Vespa applications directly from Python. The main goal is to allow for faster prototyping and provide an accessible way to leverage Vespa's advanced search capabilities including vector search, hybrid retrieval, ranking, and real-time serving. + +## Repository Structure + +- `/vespa/`: Core Python package containing the main API modules + - `/application.py`: Main Vespa application interface for data-plane operations, such as queries, feeding, and visiting + - `/package.py`: Application package definitions, schemas, fields, and configurations + - `/deployment.py`: Deployment and control-plane interfaces for Vespa Cloud and Docker + - `/evaluation.py`: Tools for evaluating search quality and performance + - `/io.py`: Response handling and data structures + - `/querybuilder/`: Python DSL for building YQL queries + - `/configuration/`: XML configuration generation and services setup +- `/tests/`: Comprehensive test suite with unit and integration tests + - `/unit/`: Unit tests for individual components + - `/integration/`: Integration tests with actual Vespa deployments +- `/docs/`: Documentation and example notebooks + - `/sphinx/source/`: Sphinx documentation source and Jupyter notebooks + - `/examples/`: Practical examples and tutorials (in the format of Jupyter notebooks) +- `/vespacli/`: Python package wrapper of Vespa CLI (This should generally not be modified) +- Root-level scripts: `feed_to_vespa.py`, `feed-split.py` for splitting and feeding data to the Vespa Docsearch application. + +## Core Dependencies and Libraries + +**Primary Dependencies:** +- `requests` and `httpx` - HTTP client libraries for API communication +- `docker` - Docker container management for local deployments +- `jinja2` - Template engine for configuration file generation +- `cryptography` - Certificate and key management for Vespa Cloud +- `lxml` - XML processing and validation +- `fastcore` - Utility functions and decorators +- `tenacity` - Retry logic for robust networking + +**Development Dependencies:** +- `pytest` and `unittest` - Testing frameworks +- `datasets` - For loading and processing data in examples +- Various ML libraries in examples: `transformers`, `torch`, `lightgbm`, `pandas` + +## Coding Standards and Conventions + +**General Guidelines:** +- Strive for simplicity and clarity in code +- Avoid adding external dependencies unless absolutely necessary and approved by user. + +**Python Standards:** +- Use Python 3.9+ features and type hints throughout +- Follow PEP 8 style guidelines with meaningful variable names +- Use lowercase with underscores for function and variable names (`snake_case`) +- Use PascalCase for class names +- Include comprehensive docstrings for all public methods and classes + +**API Design Patterns:** +- Use context managers (`with app.syncio()`, `async with app.asyncio()`) for connection pooling +- Provide both synchronous and asynchronous interfaces +- Use dataclasses and TypedDict for structured configuration +- Follow builder pattern for complex objects (ApplicationPackage, Schema, etc.) +- Use callback functions for handling feed operations and responses + +**Error Handling:** +- Raise `VespaError` for Vespa-specific exceptions +- Use tenacity decorators for automatic retry logic +- Validate inputs early and provide clear error messages +- Check response status codes and provide meaningful feedback + +**Testing Practices:** +- Write both unit tests (using unittest/pytest) and integration tests +- Use mock objects for external dependencies +- Test both successful operations and error conditions +- Include doctests in example code within notebooks + +**Configuration and Templates:** +- Use Jinja2 templates for generating XML configurations +- Validate XML against RelaxNG schemas where available +- Support both programmatic and declarative configuration styles +- Maintain backward compatibility when possible + +## Key Architecture Patterns + +**Application Package Creation:** +```python +from vespa.package import ApplicationPackage, Schema, Document, Field, RankProfile + +app_package = ApplicationPackage(name="myapp") +app_package.schema.add_fields( + Field(name="title", type="string", indexing=["index", "summary"]) +) +``` + +**Deployment Patterns:** +- Local Docker deployment via `VespaDocker` +- Vespa Cloud deployment via `VespaCloud` with certificate authentication +- Support for both development and production configurations + +**Query and Feed Operations:** +- Use connection pooling and compression for performance +- Support batch operations and iterables for large datasets +- Provide progress callbacks for long-running operations +- Handle rate limiting and retries automatically + +**Example Code Structure:** +- Start with package installation and imports +- Create application package with schema definition +- Deploy to target environment (Docker or Cloud) +- Feed data using iterables or individual documents +- Execute queries with various ranking strategies +- Include cleanup and teardown procedures + +## Documentation source + +Very often you need to consult the Vespa documentation. For example for format of the XML configuration files, or for documentation of a specific query parameter, and so on. Find relevant info using the context7 MCP server that you have available. (repo: vespa-engine/documentation) + + +## Development Workflow + +We use `uv` for managing dependencies. +To install the project with development dependencies, run: +```bash +uv sync --extra dev +``` + +After that you can run tests with: +```bash +uv run pytest tests/unit/ -v +``` +You can also replace `unit`with `integration` to run integration tests, or just `tests/` to run all tests, or run specific test files or test cases the same way. \ No newline at end of file