From 9fe123ae60b2e7a722a50023dcd0a821d5b1f15e Mon Sep 17 00:00:00 2001 From: Stijn Goossens <22433228+StijnGoossens@users.noreply.github.com> Date: Tue, 12 May 2026 08:14:06 +0000 Subject: [PATCH 1/5] fix: narrow supported python versions --- README.md | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 43aacae7..30f21749 100644 --- a/README.md +++ b/README.md @@ -490,7 +490,7 @@ The following development environments are supported: - This project follows the [Conventional Commits](https://www.conventionalcommits.org/) standard to automate [Semantic Versioning](https://semver.org/) and [Keep A Changelog](https://keepachangelog.com/) with [Commitizen](https://github.com/commitizen-tools/commitizen). - Run `poe` from within the development environment to print a list of [Poe the Poet](https://github.com/nat-n/poethepoet) tasks available to run on this project. -- Run `uv add {package}` from within the development environment to install a run time dependency and add it to `pyproject.toml` and `uv.lock`. Add `--dev` to install a development dependency. +- Run `uv add {package}` from within the development environment to install a run time dependency and add it to `pyproject.toml`. Add `--dev` to install a development dependency. - Run `uv sync --upgrade` from within the development environment to upgrade all dependencies to the latest versions allowed by `pyproject.toml`. Add `--only-dev` to upgrade the development dependencies only. - Run `cz bump` to bump the package's version, update the `CHANGELOG.md`, and create a git tag. Then push the changes and the git tag with `git push origin main --tags`. diff --git a/pyproject.toml b/pyproject.toml index fa47e707..558baf33 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ readme = "README.md" authors = [ { name = "Laurent Sorber", email = "laurent@superlinear.eu" }, ] -requires-python = ">=3.10,<4.0" +requires-python = ">=3.10,<3.14" dependencies = [ # Configuration: "platformdirs (>=4.0.0)", From b3029a432d2258791a98ea3807316a54533f84aa Mon Sep 17 00:00:00 2001 From: Stijn Goossens <22433228+StijnGoossens@users.noreply.github.com> Date: Tue, 12 May 2026 08:55:21 +0000 Subject: [PATCH 2/5] fix: remove unavailable mistral extra --- README.md | 6 +++--- pyproject.toml | 1 - src/raglite/_mistral_ocr.py | 10 ++-------- 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 30f21749..5841a5c3 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,10 @@ To add support for filetypes other than PDF, use the `pandoc` extra: pip install raglite[pandoc] ``` -To add support for high-quality document processing with [Mistral OCR](https://docs.mistral.ai/capabilities/document/), use the `mistral-ocr` extra: +To add support for high-quality document processing with [Mistral OCR](https://docs.mistral.ai/capabilities/document/), install `mistralai`: ```sh -pip install raglite[mistral-ocr] +pip install mistralai ``` To add support for evaluation, use the `ragas` extra: @@ -160,7 +160,7 @@ my_config = RAGLiteConfig( > ✍️ To insert documents other than PDF, install the `pandoc` extra with `pip install raglite[pandoc]`. > [!TIP] -> 🔎 For higher-quality document processing with automatic image descriptions, install the `mistral-ocr` extra with `pip install raglite[mistral-ocr]` and configure it as follows: +> 🔎 For higher-quality document processing with automatic image descriptions, install `mistralai` and configure it as follows: > ```python > from raglite import RAGLiteConfig, MistralOCRConfig > diff --git a/pyproject.toml b/pyproject.toml index 558baf33..b5b9fb92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,6 @@ chainlit = ["chainlit (>=2.0.0)"] # Large Language Models: llama-cpp-python = ["llama-cpp-python (>=0.3.9)"] # Markdown conversion: -mistral-ocr = ["mistralai (>=1.10.1)"] pandoc = ["pypandoc-binary (>=1.13)"] # Evaluation: ragas = ["pandas (>=2.1.1)", "ragas (>=0.3.3)"] diff --git a/src/raglite/_mistral_ocr.py b/src/raglite/_mistral_ocr.py index 84fccc78..43daad04 100644 --- a/src/raglite/_mistral_ocr.py +++ b/src/raglite/_mistral_ocr.py @@ -73,10 +73,7 @@ def _get_mistral_client(processor_config: MistralOCRConfig) -> Any: try: from mistralai import Mistral except ImportError as e: - error_msg = ( - "To use MistralOCR, please install the `mistral-ocr` extra: " - "`pip install raglite[mistral-ocr]` or `uv add raglite[mistral-ocr]`." - ) + error_msg = "To use MistralOCR, please install `mistralai`." raise ImportError(error_msg) from e api_key = _get_api_key(processor_config) @@ -88,10 +85,7 @@ def _get_response_format_converter() -> Any: try: from mistralai.extra import response_format_from_pydantic_model except ImportError as e: - error_msg = ( - "To use MistralOCR, please install the `mistral-ocr` extra: " - "`uv add raglite[mistral-ocr]` or `pip install raglite[mistral-ocr]`." - ) + error_msg = "To use MistralOCR, please install `mistralai`." raise ImportError(error_msg) from e return response_format_from_pydantic_model From d0d55d610e07dab9598f4ddeddf575307f8e471c Mon Sep 17 00:00:00 2001 From: Stijn Goossens <22433228+StijnGoossens@users.noreply.github.com> Date: Tue, 12 May 2026 10:06:45 +0000 Subject: [PATCH 3/5] fix: exclude crashing mypy release The highest-resolution CI lint jobs selected mypy 2.1.0 and failed during the pre-commit mypy hook with an internal error, even though nearby chunks reported "Success: no issues found". Exclude that specific mypy release so highest-resolution jobs resolve to a non-crashing version while preserving future mypy updates. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b5b9fb92..3a818915 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dev = [ "ipykernel (>=6.29.4)", "ipython (>=8.18.0)", "ipywidgets (>=8.1.2)", - "mypy (>=1.18.2)", + "mypy (>=1.18.2,!=2.1.0)", "pdoc (>=15.0.1)", "poethepoet (>=0.32.1)", "pre-commit (>=4.0.1)", From b784e6b3ec158fa6ad2908c1609068c984c688be Mon Sep 17 00:00:00 2001 From: Stijn Goossens <22433228+StijnGoossens@users.noreply.github.com> Date: Tue, 12 May 2026 10:26:32 +0000 Subject: [PATCH 4/5] fix: run mypy pre-commit as one check The CI lint failure was not limited to one mypy release: highest-resolution jobs crashed with mypy 2.1.0 and then 2.0.0, and local pre-commit also reproduced an internal error with mypy 1.20.1. In each failure, pre-commit had split the hook into several 4-file mypy invocations.\n\nRun mypy once over src and tests, and disable filename passing for the hook. This matches the direct project-level mypy invocation, which passes across the tested mypy versions, and avoids chunked partial graph analysis/cache/report interactions. --- .pre-commit-config.yaml | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c429a4b2..f56221df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,6 +68,7 @@ repos: types_or: [python, pyi] - id: mypy name: mypy - entry: mypy + entry: mypy src tests language: system + pass_filenames: false types: [python] diff --git a/pyproject.toml b/pyproject.toml index 3a818915..b5b9fb92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dev = [ "ipykernel (>=6.29.4)", "ipython (>=8.18.0)", "ipywidgets (>=8.1.2)", - "mypy (>=1.18.2,!=2.1.0)", + "mypy (>=1.18.2)", "pdoc (>=15.0.1)", "poethepoet (>=0.32.1)", "pre-commit (>=4.0.1)", From e4f2eb215ce72c93851ed3bb35629c85f8e509b8 Mon Sep 17 00:00:00 2001 From: Stijn Goossens <22433228+StijnGoossens@users.noreply.github.com> Date: Tue, 12 May 2026 11:27:44 +0000 Subject: [PATCH 5/5] fix: address Mistral OCR review comments Keep the mistral-ocr extra as an empty compatibility placeholder so existing raglite[mistral-ocr] install commands keep resolving without pulling the quarantined mistralai package through --all-extras. Also clarify lazy-import errors so users know an incompatible installed mistralai version may need upgrading, not just installation. --- pyproject.toml | 1 + src/raglite/_mistral_ocr.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b5b9fb92..134e84d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,6 +82,7 @@ chainlit = ["chainlit (>=2.0.0)"] llama-cpp-python = ["llama-cpp-python (>=0.3.9)"] # Markdown conversion: pandoc = ["pypandoc-binary (>=1.13)"] +mistral-ocr = [] # Evaluation: ragas = ["pandas (>=2.1.1)", "ragas (>=0.3.3)"] # Benchmarking: diff --git a/src/raglite/_mistral_ocr.py b/src/raglite/_mistral_ocr.py index 43daad04..be0f8f19 100644 --- a/src/raglite/_mistral_ocr.py +++ b/src/raglite/_mistral_ocr.py @@ -73,7 +73,7 @@ def _get_mistral_client(processor_config: MistralOCRConfig) -> Any: try: from mistralai import Mistral except ImportError as e: - error_msg = "To use MistralOCR, please install `mistralai`." + error_msg = "To use MistralOCR, please install or upgrade `mistralai`." raise ImportError(error_msg) from e api_key = _get_api_key(processor_config) @@ -85,7 +85,7 @@ def _get_response_format_converter() -> Any: try: from mistralai.extra import response_format_from_pydantic_model except ImportError as e: - error_msg = "To use MistralOCR, please install `mistralai`." + error_msg = "To use MistralOCR, please install or upgrade `mistralai`." raise ImportError(error_msg) from e return response_format_from_pydantic_model @@ -177,7 +177,7 @@ def mistral_ocr_to_markdown(doc_path: Path, *, processor_config: MistralOCRConfi Raises ------ ImportError - If the mistralai package is not installed. + If the mistralai package is not installed or is incompatible. ValueError If MISTRAL_API_KEY is not set and MistralOCRConfig.api_key is None. MistralOCRError