From 58b54e62a565743945a4dd4942ecff58838661fd Mon Sep 17 00:00:00 2001 From: r266-tech Date: Fri, 12 Jun 2026 04:48:54 +0800 Subject: [PATCH 1/2] Preserve rerank scores around empty abstracts --- openviking/retrieve/hierarchical_retriever.py | 39 ++++++++++++------- .../test_hierarchical_retriever_rerank.py | 29 ++++++++++++-- 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/openviking/retrieve/hierarchical_retriever.py b/openviking/retrieve/hierarchical_retriever.py index 62782bf36..c524edae3 100644 --- a/openviking/retrieve/hierarchical_retriever.py +++ b/openviking/retrieve/hierarchical_retriever.py @@ -15,6 +15,15 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Tuple +from openviking_cli.retrieve.types import ( + ContextType, + MatchedContext, + QueryResult, + TypedQuery, +) +from openviking_cli.utils.config import RerankConfig, RetrievalConfig +from openviking_cli.utils.logger import get_logger + from openviking.core.retrieval_targets import default_target_directories from openviking.models.embedder.base import EmbedResult, embed_compat from openviking.models.rerank import RerankClient @@ -24,14 +33,6 @@ from openviking.storage import VikingDBManager, VikingDBManagerProxy from openviking.telemetry import get_current_telemetry from openviking.utils.time_utils import parse_iso_datetime -from openviking_cli.retrieve.types import ( - ContextType, - MatchedContext, - QueryResult, - TypedQuery, -) -from openviking_cli.utils.config import RerankConfig, RetrievalConfig -from openviking_cli.utils.logger import get_logger logger = get_logger(__name__) @@ -273,26 +274,34 @@ def _rerank_scores( if not self._rerank_client or not documents: return fallback_scores + rerank_documents = [ + (index, document) + for index, document in enumerate(documents) + if document.strip() + ] + if not rerank_documents: + return fallback_scores + try: - scores = self._rerank_client.rerank_batch(query, documents) + scores = self._rerank_client.rerank_batch( + query, [document for _, document in rerank_documents] + ) except Exception as e: logger.warning( "[HierarchicalRetriever] Rerank failed, fallback to vector scores: %s", e ) return fallback_scores - if not scores or len(scores) != len(documents): + if not scores or len(scores) != len(rerank_documents): logger.warning( "[HierarchicalRetriever] Invalid rerank result, fallback to vector scores" ) return fallback_scores - normalized_scores: List[float] = [] - for score, fallback in zip(scores, fallback_scores, strict=True): + normalized_scores = list(fallback_scores) + for score, (index, _) in zip(scores, rerank_documents, strict=True): if isinstance(score, (int, float)): - normalized_scores.append(float(score)) - else: - normalized_scores.append(fallback) + normalized_scores[index] = float(score) return normalized_scores def _merge_starting_points( diff --git a/tests/retrieve/test_hierarchical_retriever_rerank.py b/tests/retrieve/test_hierarchical_retriever_rerank.py index 1c5ab89de..a766f12ab 100644 --- a/tests/retrieve/test_hierarchical_retriever_rerank.py +++ b/tests/retrieve/test_hierarchical_retriever_rerank.py @@ -4,13 +4,13 @@ """Hierarchical retriever rerank behavior tests.""" import pytest - -from openviking.retrieve.hierarchical_retriever import HierarchicalRetriever, RetrieverMode -from openviking.server.identity import RequestContext, Role from openviking_cli.retrieve.types import ContextType, TypedQuery from openviking_cli.session.user_id import UserIdentifier from openviking_cli.utils.config import RerankConfig, RetrievalConfig +from openviking.retrieve.hierarchical_retriever import HierarchicalRetriever, RetrieverMode +from openviking.server.identity import RequestContext, Role + class DummyEmbedResult: def __init__(self) -> None: @@ -300,6 +300,29 @@ def test_merge_starting_points_prefers_rerank_scores_in_thinking_mode(monkeypatc assert fake_client.calls == [("hello", ["root A", "root B"])] +def test_rerank_scores_preserves_fallbacks_for_empty_documents(monkeypatch): + fake_client = FakeRerankClient([0.95, 0.05]) + monkeypatch.setattr( + "openviking.retrieve.hierarchical_retriever.RerankClient.from_config", + lambda config: fake_client, + ) + + retriever = HierarchicalRetriever( + storage=DummyStorage(), + embedder=DummyEmbedder(), + rerank_config=_config(), + ) + + scores = retriever._rerank_scores( + "hello", + ["root A", "", " ", "root D"], + [0.2, 0.8, 0.7, 0.4], + ) + + assert scores == [0.95, 0.8, 0.7, 0.05] + assert fake_client.calls == [("hello", ["root A", "root D"])] + + @pytest.mark.asyncio async def test_retrieve_uses_rerank_scores_in_thinking_mode(monkeypatch): fake_client = FakeRerankClient([0.95, 0.05, 0.11, 0.95]) From b3d914d280beec7bfed7f232de810476312fb07f Mon Sep 17 00:00:00 2001 From: r266-tech Date: Fri, 12 Jun 2026 08:34:34 +0800 Subject: [PATCH 2/2] style: format rerank empty-abstract fix --- openviking/retrieve/hierarchical_retriever.py | 21 ++++++++----------- .../test_hierarchical_retriever_rerank.py | 6 +++--- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/openviking/retrieve/hierarchical_retriever.py b/openviking/retrieve/hierarchical_retriever.py index c524edae3..1306870c0 100644 --- a/openviking/retrieve/hierarchical_retriever.py +++ b/openviking/retrieve/hierarchical_retriever.py @@ -15,15 +15,6 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Tuple -from openviking_cli.retrieve.types import ( - ContextType, - MatchedContext, - QueryResult, - TypedQuery, -) -from openviking_cli.utils.config import RerankConfig, RetrievalConfig -from openviking_cli.utils.logger import get_logger - from openviking.core.retrieval_targets import default_target_directories from openviking.models.embedder.base import EmbedResult, embed_compat from openviking.models.rerank import RerankClient @@ -33,6 +24,14 @@ from openviking.storage import VikingDBManager, VikingDBManagerProxy from openviking.telemetry import get_current_telemetry from openviking.utils.time_utils import parse_iso_datetime +from openviking_cli.retrieve.types import ( + ContextType, + MatchedContext, + QueryResult, + TypedQuery, +) +from openviking_cli.utils.config import RerankConfig, RetrievalConfig +from openviking_cli.utils.logger import get_logger logger = get_logger(__name__) @@ -275,9 +274,7 @@ def _rerank_scores( return fallback_scores rerank_documents = [ - (index, document) - for index, document in enumerate(documents) - if document.strip() + (index, document) for index, document in enumerate(documents) if document.strip() ] if not rerank_documents: return fallback_scores diff --git a/tests/retrieve/test_hierarchical_retriever_rerank.py b/tests/retrieve/test_hierarchical_retriever_rerank.py index a766f12ab..2b1e4c5d8 100644 --- a/tests/retrieve/test_hierarchical_retriever_rerank.py +++ b/tests/retrieve/test_hierarchical_retriever_rerank.py @@ -4,12 +4,12 @@ """Hierarchical retriever rerank behavior tests.""" import pytest -from openviking_cli.retrieve.types import ContextType, TypedQuery -from openviking_cli.session.user_id import UserIdentifier -from openviking_cli.utils.config import RerankConfig, RetrievalConfig from openviking.retrieve.hierarchical_retriever import HierarchicalRetriever, RetrieverMode from openviking.server.identity import RequestContext, Role +from openviking_cli.retrieve.types import ContextType, TypedQuery +from openviking_cli.session.user_id import UserIdentifier +from openviking_cli.utils.config import RerankConfig, RetrievalConfig class DummyEmbedResult: