From 775a2cf3af3b9eca4ec82fd076a5ce3a7edf7ef2 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Wed, 10 Jun 2026 18:13:41 +0800 Subject: [PATCH 01/19] =?UTF-8?q?memory-resource=E8=AE=B0=E5=BF=86?= =?UTF-8?q?=E9=93=BE=E6=8E=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/zh/api/02-resources.md | 9 +- docs/zh/api/03-filesystem.md | 2 + .../processing/resource_linking.yaml | 59 ++ .../processing/resource_unlinking.yaml | 53 ++ openviking/server/routers/filesystem.py | 2 + openviking/service/core.py | 8 + openviking/service/fs_service.py | 21 +- .../service/resource_memory_link_service.py | 781 ++++++++++++++++++ openviking/service/resource_service.py | 38 + openviking/session/memory/memory_updater.py | 14 +- .../session/memory/utils/link_renderer.py | 9 +- openviking/storage/content_write.py | 204 ++++- tests/server/test_content_write_service.py | 61 ++ tests/server/test_filesystem_router.py | 35 + .../test_resource_memory_link_service.py | 358 ++++++++ tests/session/memory/test_memory_updater.py | 69 ++ tests/test_link_renderer.py | 5 + 17 files changed, 1713 insertions(+), 15 deletions(-) create mode 100644 openviking/prompts/templates/processing/resource_linking.yaml create mode 100644 openviking/prompts/templates/processing/resource_unlinking.yaml create mode 100644 openviking/service/resource_memory_link_service.py create mode 100644 tests/server/test_filesystem_router.py create mode 100644 tests/service/test_resource_memory_link_service.py diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 87ded1b511..54f7e4faf8 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -121,7 +121,8 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector 3. 调用对应 Parser 解析内容 4. 构建目录树并写入 AGFS 5. `wait=true` 时等待语义处理完成;`wait=false` 时返回 `task_id` 用于队列跟踪 -6. 如指定 `--watch-interval`,设置定时更新任务 +6. 如果 `reason` 非空,基于 `reason` 和资源 URI 触发一次独立的 memory linking,生成或更新合适的用户记忆 +7. 如指定 `--watch-interval`,设置定时更新任务 **代码入口**: - `openviking/client/local.py:LocalClient.add_resource` - SDK 入口(嵌入式) @@ -141,7 +142,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector | to | string | 否 | - | 目标 Viking URI(精确位置)。与 `parent` 互斥 | | parent | string | 否 | - | 父级 Viking URI(资源放入此目录下)。与 `to` 互斥 | | create_parent | bool | 否 | False | 如果父目录不存在,自动创建父目录(服务端标志) | -| reason | string | 否 | "" | 添加资源的原因(用于文档化和相关性提升,实验特性) | +| reason | string | 否 | "" | 添加资源的原因。非空时会基于该原因和资源 URI 生成或更新用户记忆,并在记忆中记录对资源的引用 | | instruction | string | 否 | "" | 语义提取的处理指令(实验特性) | | wait | bool | 否 | False | 是否等待语义处理和向量化完成才返回 | | timeout | float | 否 | None | 超时时间(秒),仅 `wait=true` 时生效 | @@ -158,6 +159,9 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector - `to` 和 `parent` 不能同时使用;如果使用 `parent` 且希望父目录不存在时自动创建,请传 `create_parent=true`。指定 `to` 且目标已存在时,触发增量更新。 - `path` 和 `temp_file_id` 不能同时指定,上传本地文件需要先通过 [temp_upload](#temp_upload) 上传获取 `temp_file_id`,在 SDK 和 CLI 中已经封装好。 - 只有 Git 仓库来源在 `wait=false` 时使用完整后台导入;OpenViking 会先完成仓库 preflight 和目标规划,再返回 `task_id`。 +- `reason` 触发的记忆生成不会读取或展开资源正文,只使用 `reason`、`viking://resources/...` URI 和可用的资源名称。系统会选择合适的既有用户记忆类型(如 `profile`、`entities`、`events`、`preferences`),不会强制写入固定记忆类型。 +- 资源文件本身不会写入额外注释或 metadata 文件。资源与记忆的关联只保存在记忆文件的 `MEMORY_FIELDS.resource_refs` / `links` 中。 +- 删除 `viking://resources/...` 时,系统会在删除前扫描当前用户记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 - 其他来源在 `wait=false` 时会在响应前完成来源解析、目标解析和 AGFS 写入,仅 semantic 与 embedding 队列继续异步处理。 - `watch_interval > 0` 时,如果指定了 `to`,监控任务绑定该目标;如果未指定 `to`,监控任务绑定本次导入返回的 `root_uri`。如果无法得到稳定 `root_uri`,请求会报错并要求显式传 `to`。 - 本地目录输入会遵循 `.gitignore`(根目录和子目录,标准 Git 语义);`ignore_dirs`、`include`、`exclude` 会在此基础上进一步过滤。 @@ -350,6 +354,7 @@ task_id uuid-xxx | `errors` | array | 处理过程中的错误列表 | | `warnings` | array | (可选)处理过程中的警告列表(仅在 `strict=False` 时可能出现) | | `queue_status` | object | (可选,仅当 `wait=true` 时)队列处理状态,包含 `pending`、`processing`、`completed` 计数 | +| `memory_linking` | object | (可选,仅当 `reason` 触发记忆生成时)本次资源 URI 与用户记忆的关联结果 | 对于 `wait=false` 的 Git 仓库来源,后台任务的 `task_type="add_resource"`,`resource_id` 等于返回的 `root_uri`。运行中的任务记录可能包含 `stage`;完成后的任务 `result` 会包含带有 semantic 和 embedding 汇总的 `queue_status`。 diff --git a/docs/zh/api/03-filesystem.md b/docs/zh/api/03-filesystem.md index 39b9e5304e..1ec2724477 100644 --- a/docs/zh/api/03-filesystem.md +++ b/docs/zh/api/03-filesystem.md @@ -635,6 +635,8 @@ openviking rm viking://resources/old.md [--recursive] `estimated_deleted_count` 字段(递归删除时)包含删除的项目(文件和目录)估计数量(来自向量索引)。CLI 会在输出中显示此信息。 +删除 `viking://resources/...` 时,响应可能包含 `memory_cleanup`,表示删除前已清理引用该资源 URI 的用户记忆。关联只来自记忆文件的 `MEMORY_FIELDS.resource_refs` / `links`,资源文件本身不会保存 reason 注释或 sidecar metadata 文件。 + --- ### mv() diff --git a/openviking/prompts/templates/processing/resource_linking.yaml b/openviking/prompts/templates/processing/resource_linking.yaml new file mode 100644 index 0000000000..c89da7a617 --- /dev/null +++ b/openviking/prompts/templates/processing/resource_linking.yaml @@ -0,0 +1,59 @@ +metadata: + id: "processing.resource_linking" + name: "Resource Linking" + description: "Create or update user memories from an add-resource reason" + version: "1.0.0" + language: "en" + category: "processing" + +variables: + - name: "output_language" + type: "string" + description: "Target language for memory content" + required: true + + - name: "resource_uri" + type: "string" + description: "Viking resource URI" + required: true + + - name: "reason" + type: "string" + description: "User-provided add-resource reason" + required: true + + - name: "source_name" + type: "string" + description: "Original resource display name" + required: false + default: "" + +template: | + You are a memory extraction agent for a resource-addition event. + + ## Objective + Create or update user memories using ONLY the user-provided reason and the resource URI. + + ## Target Output Language + All memory content MUST be written in {{ output_language }}. + + ## Resource Addition + Resource URI: {{ resource_uri }} + Source name: {{ source_name or "N/A" }} + Reason: {{ reason }} + + ## Rules + - Do NOT read, summarize, OCR, infer, or expand the resource file content. + - Treat the reason as the only semantic evidence. + - Choose the most appropriate existing user memory type from the output schema, such as profile, entities, events, or preferences. + - If the reason is not worth remembering, output no memory changes. + - Create/edit visible memory as durable natural sentences preserving user intent/judgment; rewrite terse resource labels. + - Example: reason "page 3 total should be 42" -> "User said page 3 total should be 42", not "stored report resource". + - When editing existing memory, merge with it; never replace it with only the newest resource or enumerate/count resources. + - Use the Resource URI only as resource identity metadata. + - Do NOT include raw resource URIs, file paths, or generated links in visible memory content. + - Do NOT claim that you inspected the resource itself. + - Return only memory operations that are grounded in the reason. + +llm_config: + temperature: 0.0 diff --git a/openviking/prompts/templates/processing/resource_unlinking.yaml b/openviking/prompts/templates/processing/resource_unlinking.yaml new file mode 100644 index 0000000000..c870ed87b7 --- /dev/null +++ b/openviking/prompts/templates/processing/resource_unlinking.yaml @@ -0,0 +1,53 @@ +metadata: + id: "processing.resource_unlinking" + name: "Resource Unlinking" + description: "Remove resource-derived content from user memories before resource deletion" + version: "1.0.0" + language: "en" + category: "processing" + +variables: + - name: "output_language" + type: "string" + description: "Target language for memory content" + required: true + + - name: "memory_uri" + type: "string" + description: "Memory URI to clean" + required: true + + - name: "resource_uri" + type: "string" + description: "Deleted resource URI" + required: true + + - name: "reason" + type: "string" + description: "Original add-resource reason" + required: true + +template: | + You are a memory cleanup agent for a deleted resource. + + ## Objective + Remove only the memory content that was introduced because of the deleted resource. + + ## Target Output Language + All remaining memory content MUST be written in {{ output_language }}. + + ## Deleted Resource + Resource URI: {{ resource_uri }} + Original add-resource reason: {{ reason }} + Memory URI to clean: {{ memory_uri }} + + ## Rules + - Use the preloaded memory content from the read result. + - Remove the exact Resource URI and any content that exists only because of that resource/reason. + - Preserve unrelated user memories. + - If the whole memory is only about the deleted resource, delete the memory. + - Do NOT mention that the resource was deleted in the cleaned memory unless that fact is independently worth remembering. + - Return only the necessary edit/delete memory operations. + +llm_config: + temperature: 0.0 diff --git a/openviking/server/routers/filesystem.py b/openviking/server/routers/filesystem.py index 474dd218fd..d9f3692986 100644 --- a/openviking/server/routers/filesystem.py +++ b/openviking/server/routers/filesystem.py @@ -173,6 +173,8 @@ async def rm( response_result = {"uri": uri} if isinstance(result, dict) and "estimated_deleted_count" in result: response_result["estimated_deleted_count"] = result["estimated_deleted_count"] + if isinstance(result, dict) and "memory_cleanup" in result: + response_result["memory_cleanup"] = result["memory_cleanup"] return Response(status="ok", result=response_result) diff --git a/openviking/service/core.py b/openviking/service/core.py index 80697ea60c..709d6bab85 100644 --- a/openviking/service/core.py +++ b/openviking/service/core.py @@ -20,6 +20,7 @@ from openviking.service.fs_service import FSService from openviking.service.pack_service import PackService from openviking.service.relation_service import RelationService +from openviking.service.resource_memory_link_service import ResourceMemoryLinkService from openviking.service.resource_service import ResourceService from openviking.service.search_service import SearchService from openviking.service.session_service import SessionService @@ -119,6 +120,7 @@ def __init__( self._relation_service = RelationService() self._pack_service = PackService() self._search_service = SearchService() + self._resource_memory_link_service = ResourceMemoryLinkService() self._resource_service = ResourceService() self._session_service = SessionService() self._debug_service = DebugService() @@ -415,6 +417,11 @@ async def initialize(self) -> None: self._fs_service.set_dependencies( viking_fs=self._viking_fs, privacy_config_service=self._privacy_config_service, + resource_memory_link_service=self._resource_memory_link_service, + ) + self._resource_memory_link_service.set_dependencies( + vikingdb=self._vikingdb_manager, + viking_fs=self._viking_fs, ) self._relation_service.set_viking_fs(self._viking_fs) self._pack_service.set_dependencies( @@ -428,6 +435,7 @@ async def initialize(self) -> None: resource_processor=self._resource_processor, skill_processor=self._skill_processor, watch_scheduler=self._watch_scheduler, + resource_memory_link_service=self._resource_memory_link_service, ) self._session_service.set_dependencies( vikingdb=self._vikingdb_manager, diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index c0cb47446c..d1ad3af60c 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -6,7 +6,7 @@ Provides file system operations: ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob. """ -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from openviking.core.namespace import context_type_for_uri from openviking.core.uri_validation import validate_optional_viking_uri, validate_viking_uri @@ -24,6 +24,9 @@ logger = get_logger(__name__) +if TYPE_CHECKING: + from openviking.service.resource_memory_link_service import ResourceMemoryLinkService + class FSService: """File system operations service.""" @@ -32,18 +35,22 @@ def __init__( self, viking_fs: Optional[VikingFS] = None, privacy_config_service: Optional[UserPrivacyConfigService] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ): self._viking_fs = viking_fs self._privacy_config_service = privacy_config_service + self._resource_memory_link_service = resource_memory_link_service def set_dependencies( self, viking_fs: VikingFS, privacy_config_service: Optional[UserPrivacyConfigService] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ) -> None: """Set service dependencies (for deferred initialization).""" self._viking_fs = viking_fs self._privacy_config_service = privacy_config_service + self._resource_memory_link_service = resource_memory_link_service def _ensure_initialized(self) -> VikingFS: """Ensure VikingFS is initialized.""" @@ -163,7 +170,17 @@ async def rm( """Remove resource.""" uri = validate_viking_uri(uri) viking_fs = self._ensure_initialized() - return await viking_fs.rm(uri, recursive=recursive, ctx=ctx) + cleanup_result: Optional[Dict[str, Any]] = None + if self._resource_memory_link_service and context_type_for_uri(uri) == "resource": + cleanup_result = await self._resource_memory_link_service.before_resource_delete( + ctx=ctx, + resource_uri=uri, + recursive=recursive, + ) + result = await viking_fs.rm(uri, recursive=recursive, ctx=ctx) + if cleanup_result is not None and isinstance(result, dict): + result["memory_cleanup"] = cleanup_result + return result async def mv(self, from_uri: str, to_uri: str, ctx: RequestContext) -> None: """Move resource.""" diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py new file mode 100644 index 0000000000..a694033998 --- /dev/null +++ b/openviking/service/resource_memory_link_service.py @@ -0,0 +1,781 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Link resource addition reasons to user memories. + +This module keeps resource files immutable: all traceability lives in memory +files' MEMORY_FIELDS metadata. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Dict, Iterable, List, Optional, Sequence + +from openviking.core.namespace import canonical_user_root, context_type_for_uri +from openviking.message import Message +from openviking.message.part import TextPart +from openviking.prompts.manager import render_prompt +from openviking.server.identity import RequestContext +from openviking.session.memory.dataclass import MemoryFile, ResolvedOperations +from openviking.session.memory.extract_loop import ExtractLoop +from openviking.session.memory.memory_isolation_handler import MemoryIsolationHandler +from openviking.session.memory.memory_updater import ( + ExtractContext, + MemoryUpdater, + MemoryUpdateResult, +) +from openviking.session.memory.session_extract_context_provider import SessionExtractContextProvider +from openviking.session.memory.utils.link_renderer import LinkRenderer +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.storage import VikingDBManager +from openviking.storage.viking_fs import VikingFS, get_viking_fs +from openviking_cli.exceptions import NotFoundError +from openviking_cli.utils import VikingURI, get_logger +from openviking_cli.utils.config import get_openviking_config + +logger = get_logger(__name__) + +RESOURCE_REF_SOURCE = "add_resource.reason" + + +@dataclass +class _MemoryRefMatch: + memory_uri: str + memory_file: MemoryFile + resource_ref: Dict[str, Any] + + +class _ResourceLinkingProvider(SessionExtractContextProvider): + """Provider for creating/updating memory from an add-resource reason.""" + + def __init__( + self, + *, + resource_uri: str, + reason: str, + source_name: Optional[str], + **kwargs: Any, + ): + self.resource_uri = resource_uri + self.reason = reason + self.source_name = source_name or "" + messages = [ + Message( + id="resource-linking", + role="user", + parts=[ + TextPart( + text=( + "Resource URI: " + f"{resource_uri}\nReason: {reason}\nSource name: {self.source_name}" + ) + ) + ], + ) + ] + super().__init__(messages=messages, **kwargs) + + def instruction(self) -> str: + return render_prompt( + "processing.resource_linking", + { + "output_language": self.get_output_language(), + "resource_uri": self.resource_uri, + "reason": self.reason, + "source_name": self.source_name, + }, + ) + + def _build_conversation_message(self) -> Dict[str, Any]: + return { + "role": "user", + "content": ( + "## Resource Addition\n" + f"Resource URI: {self.resource_uri}\n" + f"Reason: {self.reason}\n" + f"Source name: {self.source_name or 'N/A'}\n\n" + "Analyze only this resource addition record and output all memory " + "write/edit/delete operations in a single JSON response." + ), + } + + def _build_prefetch_search_query(self) -> str: + return "\n".join(part for part in [self.reason, self.source_name] if part).strip() + + def get_conversation_text(self) -> str: + return f"{self.reason}\n{self.resource_uri}\n{self.source_name}".strip() + + def _detect_language(self) -> str: + from openviking.session.memory.utils import resolve_output_language + + return resolve_output_language( + "\n".join(part for part in [self.reason, self.source_name] if part).strip() + ) + + +class _ResourceUnlinkingProvider(SessionExtractContextProvider): + """Provider for removing resource-derived content from one memory file.""" + + def __init__( + self, + *, + memory_uri: str, + resource_uri: str, + reason: str, + memory_file: MemoryFile, + **kwargs: Any, + ): + self.memory_uri = memory_uri + self.resource_uri = resource_uri + self.reason = reason + self.memory_file = memory_file + messages = [ + Message( + id="resource-unlinking", + role="user", + parts=[ + TextPart( + text=( + "Deleted resource URI: " + f"{resource_uri}\nOriginal reason: {reason}\n" + f"Memory URI: {memory_uri}" + ) + ) + ], + ) + ] + super().__init__(messages=messages, **kwargs) + + def instruction(self) -> str: + return render_prompt( + "processing.resource_unlinking", + { + "output_language": self.get_output_language(), + "memory_uri": self.memory_uri, + "resource_uri": self.resource_uri, + "reason": self.reason, + }, + ) + + async def prefetch(self) -> List[Dict[str, Any]]: + messages = [ + { + "role": "user", + "content": ( + "## Resource Deletion Cleanup\n" + f"Deleted resource URI: {self.resource_uri}\n" + f"Original add-resource reason: {self.reason}\n" + f"Memory to clean: {self.memory_uri}\n\n" + "Use the preloaded memory content below. Output the cleanup operation " + "as a single JSON response." + ), + } + ] + await self._append_structured_read_result(messages, 0, self.memory_uri) + return messages + + def get_tools(self) -> List[str]: + return [] + + def _build_prefetch_search_query(self) -> str: + return self.reason + + def get_conversation_text(self) -> str: + return f"{self.reason}\n{self.resource_uri}\n{self.memory_uri}".strip() + + +class ResourceMemoryLinkService: + """Create and clean memory references for resources added with a reason.""" + + def __init__( + self, + *, + vikingdb: Optional[VikingDBManager] = None, + viking_fs: Optional[VikingFS] = None, + ): + self._vikingdb = vikingdb + self._viking_fs = viking_fs + + def set_dependencies( + self, + *, + vikingdb: Optional[VikingDBManager], + viking_fs: VikingFS, + ) -> None: + self._vikingdb = vikingdb + self._viking_fs = viking_fs + + def _get_viking_fs(self) -> VikingFS: + return self._viking_fs or get_viking_fs() + + async def on_resource_added( + self, + *, + ctx: RequestContext, + resource_uri: str, + reason: str, + source_name: Optional[str] = None, + ) -> Dict[str, Any]: + """Extract user memory from an add-resource reason.""" + reason = (reason or "").strip() + if not reason: + return {"status": "skipped", "reason": "empty_reason"} + if not resource_uri: + return {"status": "skipped", "reason": "empty_resource_uri"} + + provider = _ResourceLinkingProvider( + resource_uri=resource_uri, + reason=reason, + source_name=source_name, + ctx=ctx, + viking_fs=self._get_viking_fs(), + ) + operations, extract_context, isolation_handler = await self._run_extract_loop( + provider=provider, + ctx=ctx, + ) + if not operations or not ( + operations.upsert_operations or operations.delete_file_contents or operations.errors + ): + return {"status": "no_changes", "memory_uris": []} + + result = await self._apply_memory_operations( + provider=provider, + operations=operations, + ctx=ctx, + extract_context=extract_context, + isolation_handler=isolation_handler, + ) + changed_uris = list(dict.fromkeys(result.written_uris + result.edited_uris)) + await self._append_resource_refs( + memory_uris=changed_uris, + resource_uri=resource_uri, + reason=reason, + ctx=ctx, + ) + missing_uri = await self._memory_files_missing_resource_uri(changed_uris, resource_uri, ctx) + return { + "status": "success" if not result.errors else "partial_success", + "memory_uris": changed_uris, + "deleted_memory_uris": result.deleted_uris, + "errors": [f"{uri}: {exc}" for uri, exc in result.errors], + "missing_resource_uri_uris": missing_uri, + } + + async def before_resource_delete( + self, + *, + ctx: RequestContext, + resource_uri: str, + recursive: bool = False, + ) -> Dict[str, Any]: + """Remove references from user memories before deleting a resource.""" + if context_type_for_uri(resource_uri) != "resource": + return {"status": "skipped", "reason": "not_resource"} + + matches = await self._find_referencing_memories( + ctx=ctx, + resource_uri=resource_uri, + recursive=recursive, + ) + if not matches: + return {"status": "no_references", "memory_uris": []} + + cleaned: List[str] = [] + deleted: List[str] = [] + errors: List[str] = [] + grouped = self._group_matches_by_memory(matches) + for memory_uri, memory_matches in grouped.items(): + first = memory_matches[0] + reason = str(first.resource_ref.get("reason") or "") + try: + cleanup_result = await self._cleanup_memory_reference( + ctx=ctx, + memory_uri=memory_uri, + memory_file=first.memory_file, + resource_uri=resource_uri, + reason=reason, + ) + cleaned.extend(cleanup_result.written_uris + cleanup_result.edited_uris) + deleted.extend(cleanup_result.deleted_uris) + if not cleanup_result.has_changes(): + await self._remove_resource_refs(memory_uri, resource_uri, ctx) + cleaned.append(memory_uri) + await self._assert_resource_unlinked(memory_uri, resource_uri, ctx) + except NotFoundError: + deleted.append(memory_uri) + except Exception as exc: + errors.append(f"{memory_uri}: {exc}") + + if errors: + raise RuntimeError( + "resource memory cleanup failed before deleting resource: " + "; ".join(errors) + ) + return { + "status": "success", + "memory_uris": list(dict.fromkeys(cleaned)), + "deleted_memory_uris": list(dict.fromkeys(deleted)), + } + + async def _run_extract_loop( + self, + *, + provider: SessionExtractContextProvider, + ctx: RequestContext, + allowed_memory_types: Optional[set[str]] = None, + ) -> tuple[Optional[ResolvedOperations], ExtractContext, MemoryIsolationHandler]: + config = get_openviking_config() + vlm = config.vlm.get_vlm_instance() + viking_fs = self._get_viking_fs() + extract_context = provider.get_extract_context() + isolation_handler = MemoryIsolationHandler( + ctx, + extract_context, + allowed_memory_types=allowed_memory_types, + ) + provider._isolation_handler = isolation_handler + orchestrator = ExtractLoop( + vlm=vlm, + viking_fs=viking_fs, + ctx=ctx, + context_provider=provider, + isolation_handler=isolation_handler, + ) + operations, _ = await orchestrator.run() + return operations, extract_context, isolation_handler + + async def _apply_memory_operations( + self, + *, + provider: SessionExtractContextProvider, + operations: ResolvedOperations, + ctx: RequestContext, + extract_context: ExtractContext, + isolation_handler: MemoryIsolationHandler, + ) -> MemoryUpdateResult: + updater = MemoryUpdater( + registry=provider._get_registry(), + vikingdb=self._vikingdb, + ) + return await updater.apply_operations( + operations, + ctx, + extract_context=extract_context, + isolation_handler=isolation_handler, + ) + + async def _cleanup_memory_reference( + self, + *, + ctx: RequestContext, + memory_uri: str, + memory_file: MemoryFile, + resource_uri: str, + reason: str, + ) -> MemoryUpdateResult: + memory_type = self._infer_memory_type(memory_uri, memory_file) + provider = _ResourceUnlinkingProvider( + memory_uri=memory_uri, + resource_uri=resource_uri, + reason=reason, + memory_file=memory_file, + ctx=ctx, + viking_fs=self._get_viking_fs(), + ) + operations, extract_context, isolation_handler = await self._run_extract_loop( + provider=provider, + ctx=ctx, + allowed_memory_types={memory_type} if memory_type else None, + ) + if not operations: + return MemoryUpdateResult() + result = await self._apply_memory_operations( + provider=provider, + operations=operations, + ctx=ctx, + extract_context=extract_context, + isolation_handler=isolation_handler, + ) + for uri in result.written_uris + result.edited_uris: + await self._remove_resource_refs(uri, resource_uri, ctx) + if uri == memory_uri: + await self._restore_cleanup_metadata(uri, memory_file, ctx) + return result + + async def _restore_cleanup_metadata( + self, + memory_uri: str, + original_memory_file: MemoryFile, + ctx: RequestContext, + ) -> None: + """Keep resource cleanup from introducing schema metadata.""" + viking_fs = self._get_viking_fs() + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=memory_uri) + original_extra_keys = set((original_memory_file.extra_fields or {}).keys()) + mf.extra_fields = { + key: value for key, value in mf.extra_fields.items() if key in original_extra_keys + } + mf.memory_type = original_memory_file.memory_type + if not original_memory_file.links: + mf.links = [] + if not original_memory_file.backlinks: + mf.backlinks = [] + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + + async def _append_resource_refs( + self, + *, + memory_uris: Sequence[str], + resource_uri: str, + reason: str, + ctx: RequestContext, + ) -> None: + viking_fs = self._get_viking_fs() + created_at = datetime.now(timezone.utc).isoformat() + for memory_uri in dict.fromkeys(memory_uris): + if context_type_for_uri(memory_uri) != "memory": + continue + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=memory_uri) + except Exception as exc: + logger.warning("Failed to read memory for resource ref append: %s", exc) + continue + existing_refs = self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) + allow_sentence_fallback = not any( + not self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=False) + for ref in existing_refs + ) + match_text = self._pick_match_text(mf, reason) + mf.content, rendered_match_text = self._link_resource_in_content( + mf.content, + resource_uri=resource_uri, + match_text=match_text, + allow_sentence_fallback=allow_sentence_fallback, + ) + match_text = rendered_match_text or match_text + ref = { + "resource_uri": resource_uri, + "reason": reason, + "source": RESOURCE_REF_SOURCE, + "created_at": created_at, + } + if match_text: + ref["match_text"] = match_text + mf.extra_fields["resource_refs"] = self._merge_resource_refs( + existing_refs, + ref, + ) + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + + async def _remove_resource_refs( + self, + memory_uri: str, + resource_uri: str, + ctx: RequestContext, + ) -> None: + viking_fs = self._get_viking_fs() + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=memory_uri) + refs = [ + ref + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) + if not self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=True) + ] + if refs: + mf.extra_fields["resource_refs"] = refs + else: + mf.extra_fields.pop("resource_refs", None) + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + + async def _find_referencing_memories( + self, + *, + ctx: RequestContext, + resource_uri: str, + recursive: bool, + ) -> List[_MemoryRefMatch]: + viking_fs = self._get_viking_fs() + memory_root = f"{canonical_user_root(ctx)}/memories" + try: + entries = await viking_fs.tree( + memory_root, + ctx=ctx, + node_limit=1000000, + level_limit=None, + ) + except Exception: + return [] + + matches: List[_MemoryRefMatch] = [] + for entry in entries: + uri = entry.get("uri", "") + rel_path = entry.get("rel_path", "") + if entry.get("isDir") or not uri.endswith(".md"): + continue + if rel_path.endswith("/.abstract.md") or rel_path.endswith("/.overview.md"): + continue + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=uri) + except Exception: + continue + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): + if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive): + matches.append(_MemoryRefMatch(uri, mf, ref)) + return matches + + async def _memory_files_missing_resource_uri( + self, + memory_uris: Iterable[str], + resource_uri: str, + ctx: RequestContext, + ) -> List[str]: + missing: List[str] = [] + viking_fs = self._get_viking_fs() + for uri in memory_uris: + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + except Exception: + continue + if resource_uri not in raw: + missing.append(uri) + return missing + + async def _assert_resource_unlinked( + self, + memory_uri: str, + resource_uri: str, + ctx: RequestContext, + ) -> None: + try: + raw = await self._get_viking_fs().read_file(memory_uri, ctx=ctx) + except (NotFoundError, FileNotFoundError) as exc: + raise NotFoundError(memory_uri, "memory") from exc + mf = MemoryFileUtils.read(raw, uri=memory_uri) + if resource_uri in (mf.content or ""): + raise RuntimeError(f"memory content still contains deleted resource URI: {memory_uri}") + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): + if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=True): + raise RuntimeError(f"memory still contains resource ref: {memory_uri}") + + @staticmethod + def _merge_resource_refs(existing: Any, new_ref: Dict[str, Any]) -> List[Dict[str, Any]]: + refs = ResourceMemoryLinkService._coerce_resource_refs(existing) + for ref in refs: + if ( + ref.get("resource_uri") == new_ref.get("resource_uri") + and ref.get("source") == new_ref.get("source") + ): + ref.update({k: v for k, v in new_ref.items() if v}) + return refs + refs.append(new_ref) + return refs + + @staticmethod + def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + + @staticmethod + def _group_matches_by_memory( + matches: Sequence[_MemoryRefMatch], + ) -> Dict[str, List[_MemoryRefMatch]]: + grouped: Dict[str, List[_MemoryRefMatch]] = {} + for match in matches: + grouped.setdefault(match.memory_uri, []).append(match) + return grouped + + @staticmethod + def _resource_ref_matches( + ref_uri: Any, + target_uri: str, + recursive: bool, + ) -> bool: + if not isinstance(ref_uri, str) or not ref_uri: + return False + normalized_ref = ref_uri.rstrip("/") + normalized_target = target_uri.rstrip("/") + if normalized_ref == normalized_target: + return True + return recursive and normalized_ref.startswith(normalized_target + "/") + + @classmethod + def _pick_match_text(cls, memory_file: MemoryFile, reason: str) -> Optional[str]: + content = memory_file.content or "" + candidates = [] + name = str(memory_file.extra_fields.get("name") or "").strip() + if name: + candidates.append(name) + reason_anchor = cls._extract_anchor_from_reason(reason) + if reason_anchor: + candidates.append(reason_anchor) + for token in (reason or "").replace(",", " ").replace(",", " ").split(): + stripped = token.strip() + if stripped: + candidates.append(stripped) + candidates.extend(["资源", "resource", "Resource"]) + for candidate in dict.fromkeys(candidates): + if candidate and LinkRenderer._find_match_span(content, candidate): + return candidate + return None + + @staticmethod + def _extract_anchor_from_reason(reason: str) -> Optional[str]: + text = (reason or "").strip() + if not text: + return None + patterns = [ + r"^(?:这是一张|这是|这张|这个|用户上传了(?:一张|一个)?|上传了(?:一张|一个)?|新增了|添加了)?\s*(?P[^,,。!?\n]{1,60}?)(?:的)?(?:照片|图片|截图|图像|文件|资源|文档|身份证|证件|资料)\s*$", + r"(?P[^,,。!?\n]{1,60}?)(?:的)?(?:照片|图片|截图|图像|文件|资源|文档|身份证|证件|资料)", + ] + for pattern in patterns: + match = re.search(pattern, text) + if not match: + continue + anchor = (match.group("anchor") or "").strip() + anchor = re.sub(r"^(?:关于|有关|一张|一个)\s*", "", anchor).strip() + anchor = re.sub(r"(?:的|之)$", "", anchor).strip() + if anchor: + return anchor + return None + + @classmethod + def _link_resource_in_content( + cls, + content: str, + *, + resource_uri: str, + match_text: Optional[str], + allow_sentence_fallback: bool, + ) -> tuple[str, Optional[str]]: + content = content or "" + if not content or not resource_uri: + return content, None + if cls._content_links_resource(content, resource_uri): + return content, match_text + + if match_text: + span = cls._find_unlinked_match_span(content, match_text) + if span: + linked = cls._replace_span_with_link(content, span, resource_uri) + linked = cls._remove_redundant_visible_resource_uri(linked, resource_uri) + return linked, content[span[0] : span[1]] + + if allow_sentence_fallback: + span = cls._first_sentence_span(content, resource_uri) + if span: + linked = cls._replace_span_with_link(content, span, resource_uri) + linked = cls._remove_redundant_visible_resource_uri(linked, resource_uri) + return linked, content[span[0] : span[1]].strip() + + return content, match_text + + @staticmethod + def _content_links_resource(content: str, resource_uri: str) -> bool: + return bool( + re.search( + r"\[[^\]]+\]\(" + re.escape(resource_uri) + r"\)", + content or "", + ) + ) + + @classmethod + def _find_unlinked_match_span( + cls, + content: str, + match_text: str, + ) -> Optional[tuple[int, int]]: + span = LinkRenderer._find_match_span(content, match_text) + if not span: + return None + if cls._span_inside_markdown_link(content, span): + return None + return span + + @staticmethod + def _span_inside_markdown_link(content: str, span: tuple[int, int]) -> bool: + start, end = span + for match in re.finditer(r"\[[^\]]+\]\([^)]+\)", content or ""): + if start >= match.start() and end <= match.end(): + return True + return False + + @staticmethod + def _replace_span_with_link( + content: str, + span: tuple[int, int], + resource_uri: str, + ) -> str: + start, end = span + anchor = content[start:end] + return f"{content[:start]}[{anchor}]({resource_uri}){content[end:]}" + + @staticmethod + def _first_sentence_span(content: str, resource_uri: str) -> Optional[tuple[int, int]]: + match = re.search(r"\S", content or "") + if not match: + return None + start = match.start() + line_end = content.find("\n", start) + if line_end == -1: + line_end = len(content) + line = content[start:line_end] + punctuation = re.search(r"[。!?.!?]", line) + end = start + punctuation.end() if punctuation else line_end + sentence = content[start:end].strip() + if not sentence or resource_uri in sentence or len(sentence) > 160: + return None + if ResourceMemoryLinkService._span_inside_markdown_link(content, (start, end)): + return None + return start, end + + @staticmethod + def _remove_redundant_visible_resource_uri(content: str, resource_uri: str) -> str: + if not ResourceMemoryLinkService._content_links_resource(content, resource_uri): + return content + uri = ResourceMemoryLinkService._visible_resource_uri_pattern(resource_uri) + label = r"(?:resource\s+URI|资源\s*URI|资源地址|资源链接)" + patterns = [ + re.compile(rf"(?im)^[ \t]*(?:[-*]\s*)?{label}\s*[::]\s*{uri}[ \t]*(?:\r?\n|$)"), + re.compile(rf"\s*[,,;;]?\s*{label}\s*[::]\s*{uri}"), + re.compile(rf"\s*[::]\s*{uri}"), + ] + cleaned = content + for pattern in patterns: + cleaned = pattern.sub("", cleaned) + cleaned = re.sub(r"[ \t]+([。!?.!?,,;;])", r"\1", cleaned) + cleaned = re.sub(r"\n{3,}", "\n\n", cleaned) + return cleaned.strip() + + @staticmethod + def _visible_resource_uri_pattern(resource_uri: str) -> str: + markdown_escaped_chars = set(r"\`*_{}[]()#+-.!|") + return "".join( + rf"\\?{re.escape(char)}" if char in markdown_escaped_chars else re.escape(char) + for char in resource_uri + ) + + @staticmethod + def _infer_memory_type(memory_uri: str, memory_file: MemoryFile) -> str: + memory_type = ( + memory_file.memory_type + or memory_file.extra_fields.get("memory_type") + or "" + ) + if memory_type: + return str(memory_type) + parts = [part for part in VikingURI.normalize(memory_uri).split("/") if part] + try: + idx = parts.index("memories") + except ValueError: + return "" + if len(parts) > idx + 1: + return parts[idx + 1].replace(".md", "") + return "" diff --git a/openviking/service/resource_service.py b/openviking/service/resource_service.py index e55b68f6cf..3f8646fa53 100644 --- a/openviking/service/resource_service.py +++ b/openviking/service/resource_service.py @@ -52,6 +52,7 @@ if TYPE_CHECKING: from openviking.resource.watch_manager import WatchManager from openviking.resource.watch_scheduler import WatchScheduler + from openviking.service.resource_memory_link_service import ResourceMemoryLinkService logger = get_logger(__name__) @@ -73,12 +74,14 @@ def __init__( resource_processor: Optional[ResourceProcessor] = None, skill_processor: Optional[SkillProcessor] = None, watch_scheduler: Optional["WatchScheduler"] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ): self._vikingdb = vikingdb self._viking_fs = viking_fs self._resource_processor = resource_processor self._skill_processor = skill_processor self._watch_scheduler = watch_scheduler + self._resource_memory_link_service = resource_memory_link_service self._background_tasks: set[asyncio.Task[Any]] = set() def set_dependencies( @@ -88,6 +91,7 @@ def set_dependencies( resource_processor: ResourceProcessor, skill_processor: SkillProcessor, watch_scheduler: Optional["WatchScheduler"] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ) -> None: """Set dependencies (for deferred initialization).""" self._vikingdb = vikingdb @@ -95,6 +99,7 @@ def set_dependencies( self._resource_processor = resource_processor self._skill_processor = skill_processor self._watch_scheduler = watch_scheduler + self._resource_memory_link_service = resource_memory_link_service def _get_watch_manager(self) -> Optional["WatchManager"]: if not self._watch_scheduler: @@ -625,6 +630,12 @@ async def add_resource( logger.warning( f"[ResourceService] Failed to cancel watch task for {to}: {e}" ) + await self._link_resource_reason_memory( + result=result, + ctx=ctx, + reason=reason, + source_name=kwargs.get("source_name"), + ) if not wait: from openviking.service.task_tracker import get_task_tracker @@ -676,6 +687,33 @@ async def add_resource( get_request_wait_tracker().cleanup(telemetry_id) unregister_wait_telemetry(telemetry_id) + async def _link_resource_reason_memory( + self, + *, + result: Dict[str, Any], + ctx: RequestContext, + reason: str, + source_name: Optional[str], + ) -> None: + if not self._resource_memory_link_service: + return + if not (reason or "").strip(): + return + root_uri = result.get("root_uri") + if not root_uri: + return + try: + link_result = await self._resource_memory_link_service.on_resource_added( + ctx=ctx, + resource_uri=root_uri, + reason=reason, + source_name=source_name, + ) + result["memory_linking"] = link_result + except Exception as exc: + logger.warning("[ResourceService] Failed to link resource reason memory: %s", exc) + result.setdefault("warnings", []).append(f"Memory linking failed: {exc}") + async def _monitor_queue_processing( self, task_id: str, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index cedacd7065..0350aecd7a 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -608,8 +608,16 @@ async def _apply_links_to_existing_files( viking_fs = self._get_viking_fs() if not viking_fs: return + from openviking.core.namespace import context_type_for_uri + upserted_uris = set(result.written_uris + result.edited_uris) - skip = upserted_uris | (deleted_uris or set()) + non_memory_endpoints = { + uri + for link in resolved_links + for uri in (link.from_uri, link.to_uri) + if context_type_for_uri(uri) != "memory" + } + skip = upserted_uris | (deleted_uris or set()) | non_memory_endpoints await write_stored_links(resolved_links, ctx, viking_fs, skip_uris=skip) async def _apply_delete(self, uri: str, ctx: RequestContext) -> None: @@ -669,7 +677,9 @@ async def _vectorize_memories( content = await viking_fs.read_file(uri, ctx=ctx) or "" mf = MemoryFileUtils.read(content, uri=uri) - abstract = mf.plain_content() + from openviking.session.memory.utils.link_renderer import LinkRenderer + + abstract = LinkRenderer.strip_all_links(mf.content or "") embedding_text = abstract memory_type = uri_memory_type_map.get(uri) diff --git a/openviking/session/memory/utils/link_renderer.py b/openviking/session/memory/utils/link_renderer.py index ce2695083d..55d2a0d012 100644 --- a/openviking/session/memory/utils/link_renderer.py +++ b/openviking/session/memory/utils/link_renderer.py @@ -2,7 +2,6 @@ from typing import Dict, List, Optional from openviking.core.namespace import uri_parts -from openviking.session.memory.dataclass import StoredLink class LinkRenderer: @@ -106,6 +105,12 @@ def _replace_link(m: re.Match) -> str: return LinkRenderer._RELATIVE_LINK_RE.sub(_replace_link, content) + @staticmethod + def strip_all_links(content: str) -> str: + """Remove markdown links regardless of target scheme, keeping only link text.""" + + return LinkRenderer._RELATIVE_LINK_RE.sub(lambda m: m.group("text"), content) + @staticmethod def relative_path(source_uri: str, target_uri: str) -> Optional[str]: """Compute a relative path from source_uri to target_uri in the viking:// namespace. @@ -123,7 +128,7 @@ def relative_path(source_uri: str, target_uri: str) -> Optional[str]: return None common = 0 - for s, t in zip(src, tgt): + for s, t in zip(src, tgt, strict=False): if s == t: common += 1 else: diff --git a/openviking/storage/content_write.py b/openviking/storage/content_write.py index 184b88d1a6..5a9248e9d5 100644 --- a/openviking/storage/content_write.py +++ b/openviking/storage/content_write.py @@ -5,7 +5,9 @@ from __future__ import annotations import os -from typing import Any, Dict, Optional +import re +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Sequence from openviking.core.namespace import NamespaceShapeError, canonicalize_uri, context_type_for_uri from openviking.resource.watch_storage import is_watch_task_control_uri @@ -33,6 +35,14 @@ _CREATE_ALLOWED_EXTENSIONS = frozenset( {".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".py", ".js", ".ts"} ) +_CONTENT_WRITE_RESOURCE_REF_SOURCE = "content.write" +_MARKDOWN_RESOURCE_LINK_RE = re.compile(r"\[([^\]\n]+)\]\((viking://resources/[^)\s]+)\)") +_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") +_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]+`") +_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" +_SENTENCE_BOUNDARIES = "。!?.!?\n" +_MAX_LINKIFIED_SENTENCE_CHARS = 160 class ContentWriteCoordinator: @@ -344,12 +354,19 @@ async def _write_in_place( mode: str, ctx: RequestContext, ) -> None: - if mode == "replace" and context_type_for_uri(uri) == "memory": - existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) - mf = MemoryFileUtils.read(existing_raw, uri=uri) - mf.content = content - content = MemoryFileUtils.write(mf) - await self._viking_fs.write_file(uri, content, ctx=ctx) + if context_type_for_uri(uri) == "memory": + if mode == "replace": + existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(existing_raw, uri=uri) + mf.content = content + elif mode == "append": + existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(existing_raw, uri=uri) + mf.content = mf.content + content + else: + mf = MemoryFileUtils.read(content, uri=uri) + self._sync_memory_resource_refs(mf) + await self._viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) return if mode == "append": @@ -361,6 +378,179 @@ async def _write_in_place( return await self._viking_fs.write_file(uri, content, ctx=ctx) + def _sync_memory_resource_refs(self, mf) -> None: + code_spans = self._protected_code_spans(mf.content) + markdown_refs, markdown_spans = self._extract_markdown_resource_refs( + mf.content, + code_spans, + ) + mf.content, bare_refs = self._linkify_bare_resource_uris( + mf.content, + code_spans + markdown_spans, + ) + self._merge_content_write_resource_refs(mf, markdown_refs + bare_refs) + + @staticmethod + def _protected_code_spans(content: str) -> List[tuple[int, int]]: + spans = [(match.start(), match.end()) for match in _CODE_BLOCK_RE.finditer(content or "")] + spans.extend((match.start(), match.end()) for match in _INLINE_CODE_RE.finditer(content or "")) + return spans + + @classmethod + def _extract_markdown_resource_refs( + cls, + content: str, + protected_spans: Sequence[tuple[int, int]], + ) -> tuple[List[Dict[str, Any]], List[tuple[int, int]]]: + refs: List[Dict[str, Any]] = [] + link_spans: List[tuple[int, int]] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + if cls._overlaps_spans(match.start(), match.end(), protected_spans): + continue + label = match.group(1).strip() + resource_uri = cls._trim_resource_uri(match.group(2).strip()) + link_spans.append((match.start(), match.end())) + refs.append( + { + "resource_uri": resource_uri, + "match_text": label or None, + } + ) + return refs, link_spans + + @classmethod + def _linkify_bare_resource_uris( + cls, + content: str, + protected_spans: Sequence[tuple[int, int]], + ) -> tuple[str, List[Dict[str, Any]]]: + refs: List[Dict[str, Any]] = [] + updated = content or "" + covered_start = len(updated) + 1 + + matches = list(_RESOURCE_URI_RE.finditer(updated)) + for match in reversed(matches): + resource_uri = cls._trim_resource_uri(match.group(0)) + if not resource_uri: + continue + start = match.start() + end = start + len(resource_uri) + if cls._overlaps_spans(start, end, protected_spans): + continue + + refs.append({"resource_uri": resource_uri}) + sentence_span = cls._previous_sentence_span(updated, start) + if not sentence_span: + continue + sentence_start, sentence_end = sentence_span + if end > covered_start: + continue + anchor = updated[sentence_start:sentence_end] + if "viking://resources/" in anchor or "](" in anchor: + continue + refs[-1]["match_text"] = anchor + replacement = f"[{anchor}]({resource_uri})" + updated = updated[:sentence_start] + replacement + updated[end:] + covered_start = sentence_start + + refs.reverse() + return updated, refs + + @staticmethod + def _previous_sentence_span(content: str, uri_start: int) -> Optional[tuple[int, int]]: + sentence_end = uri_start + while sentence_end > 0 and content[sentence_end - 1].isspace(): + sentence_end -= 1 + if sentence_end <= 0: + return None + + boundary_search_end = sentence_end + if content[sentence_end - 1] in _SENTENCE_BOUNDARIES: + boundary_search_end = sentence_end - 1 + sentence_start = 0 + for idx in range(boundary_search_end - 1, -1, -1): + if content[idx] in _SENTENCE_BOUNDARIES: + sentence_start = idx + 1 + break + while sentence_start < sentence_end and content[sentence_start].isspace(): + sentence_start += 1 + + anchor = content[sentence_start:sentence_end] + if not anchor or len(anchor) > _MAX_LINKIFIED_SENTENCE_CHARS: + return None + return sentence_start, sentence_end + + @staticmethod + def _trim_resource_uri(resource_uri: str) -> str: + return (resource_uri or "").rstrip(_TRAILING_URI_PUNCTUATION) + + @staticmethod + def _overlaps_spans( + start: int, + end: int, + protected_spans: Sequence[tuple[int, int]], + ) -> bool: + return any(start < span_end and end > span_start for span_start, span_end in protected_spans) + + @classmethod + def _merge_content_write_resource_refs(cls, mf, refs: Sequence[Dict[str, Any]]) -> None: + visible_refs: Dict[str, Dict[str, Any]] = {} + for ref in refs: + resource_uri = ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + continue + existing = visible_refs.setdefault(resource_uri, {"resource_uri": resource_uri}) + match_text = ref.get("match_text") + if match_text and not existing.get("match_text"): + existing["match_text"] = match_text + + existing_refs = cls._coerce_resource_refs(mf.extra_fields.get("resource_refs")) + merged: List[Dict[str, Any]] = [] + seen_resource_uris: set[str] = set() + created_at = datetime.now(timezone.utc).isoformat() + for existing_ref in existing_refs: + resource_uri = existing_ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + merged.append(existing_ref) + continue + visible_ref = visible_refs.get(resource_uri) + if ( + existing_ref.get("source") == _CONTENT_WRITE_RESOURCE_REF_SOURCE + and visible_ref is None + ): + continue + if visible_ref and existing_ref.get("source") == _CONTENT_WRITE_RESOURCE_REF_SOURCE: + if visible_ref.get("match_text"): + existing_ref["match_text"] = visible_ref["match_text"] + existing_ref.setdefault("created_at", created_at) + merged.append(existing_ref) + seen_resource_uris.add(resource_uri) + + for resource_uri, visible_ref in visible_refs.items(): + if resource_uri in seen_resource_uris: + continue + ref = { + "resource_uri": resource_uri, + "source": _CONTENT_WRITE_RESOURCE_REF_SOURCE, + "created_at": created_at, + } + if visible_ref.get("match_text"): + ref["match_text"] = visible_ref["match_text"] + merged.append(ref) + + if merged: + mf.extra_fields["resource_refs"] = merged + else: + mf.extra_fields.pop("resource_refs", None) + + @staticmethod + def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + async def _enqueue_semantic_refresh( self, *, diff --git a/tests/server/test_content_write_service.py b/tests/server/test_content_write_service.py index 6a8927c902..406212d343 100644 --- a/tests/server/test_content_write_service.py +++ b/tests/server/test_content_write_service.py @@ -119,6 +119,67 @@ async def test_memory_append_preserves_metadata(service): assert stored_result.extra_fields == expected_mf.extra_fields +@pytest.mark.asyncio +async def test_memory_write_adds_resource_refs_for_markdown_resource_link(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + content = f"用户上传了一张[越前龙马]({resource_uri})的照片" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + refs = mf.extra_fields["resource_refs"] + assert mf.content == content + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "越前龙马" + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_memory_write_linkifies_bare_resource_uri_previous_sentence(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write( + memory_uri, + content=f"用户上传了一张越前龙马的照片 {resource_uri}", + ctx=ctx, + mode="replace", + ) + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == f"[用户上传了一张越前龙马的照片]({resource_uri})" + refs = mf.extra_fields["resource_refs"] + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "用户上传了一张越前龙马的照片" + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_memory_write_ignores_resource_uri_in_inline_code(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + content = f"调试示例:`{resource_uri}`" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == content + assert "resource_refs" not in mf.extra_fields + assert mf.links == [] + + class _FakeHandle: def __init__(self, handle_id: str): self.id = handle_id diff --git a/tests/server/test_filesystem_router.py b/tests/server/test_filesystem_router.py new file mode 100644 index 0000000000..8f16c8ff06 --- /dev/null +++ b/tests/server/test_filesystem_router.py @@ -0,0 +1,35 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Filesystem router tests.""" + +from types import SimpleNamespace + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.server.routers import filesystem +from openviking_cli.session.user_id import UserIdentifier + + +@pytest.mark.asyncio +async def test_rm_preserves_memory_cleanup(monkeypatch): + cleanup = {"status": "success", "memory_uris": ["viking://user/alice/memories/entities/a.md"]} + + async def fake_rm(uri, ctx=None, recursive=False): + return {"estimated_deleted_count": 1, "memory_cleanup": cleanup} + + monkeypatch.setattr( + filesystem, + "get_service", + lambda: SimpleNamespace(fs=SimpleNamespace(rm=fake_rm)), + ) + + response = await filesystem.rm( + uri="viking://resources/id_card.pdf", + recursive=True, + _ctx=RequestContext(user=UserIdentifier("acct", "alice"), role=Role.USER), + ) + + assert response.result["uri"] == "viking://resources/id_card.pdf" + assert response.result["estimated_deleted_count"] == 1 + assert response.result["memory_cleanup"] == cleanup diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py new file mode 100644 index 0000000000..e4dc481564 --- /dev/null +++ b/tests/service/test_resource_memory_link_service.py @@ -0,0 +1,358 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for resource-memory linking service.""" + +from unittest.mock import AsyncMock + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.service.resource_memory_link_service import ( + ResourceMemoryLinkService, + _ResourceLinkingProvider, +) +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.memory_updater import MemoryUpdateResult +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking_cli.session.user_id import UserIdentifier + + +class _FakeVikingFS: + def __init__(self, store): + self.store = store + + async def read_file(self, uri, ctx=None): + return self.store[uri] + + async def write_file(self, uri, content, ctx=None): + self.store[uri] = content + + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): + prefix = uri.rstrip("/") + "/" + return [ + { + "uri": item_uri, + "rel_path": item_uri.removeprefix(prefix), + "isDir": False, + } + for item_uri in self.store + if item_uri.startswith(prefix) + ] + + +class _ReadFailVikingFS: + async def read_file(self, uri, ctx=None): + raise RuntimeError("storage unavailable") + + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): + memory_uri = "viking://user/alice/memories/entities/wang.md" + return [{"uri": memory_uri, "rel_path": "entities/wang.md", "isDir": False}] + + +@pytest.fixture +def request_context(): + return RequestContext( + user=UserIdentifier("acct", "alice"), + role=Role.USER, + ) + + +@pytest.mark.asyncio +async def test_append_resource_refs_stores_only_memory_metadata(request_context): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/id_card.pdf" + store = {memory_uri: "王大锤的身份证资料。\n"} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + await service._append_resource_refs( + memory_uris=[memory_uri], + resource_uri=resource_uri, + reason="这是王大锤的身份证", + ctx=request_context, + ) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.extra_fields["resource_refs"][0]["resource_uri"] == resource_uri + assert mf.extra_fields["resource_refs"][0]["source"] == "add_resource.reason" + assert mf.extra_fields["resource_refs"][0]["match_text"] == "王大锤" + assert mf.links == [] + assert f"[王大锤]({resource_uri})" in store[memory_uri] + assert resource_uri not in store + + +def test_resource_linking_provider_detects_language_from_reason_not_resource_uri(): + provider = _ResourceLinkingProvider( + resource_uri="viking://resources/images/2026/06/10/yueqian_jpeg", + reason="这是越前龙马的照片", + source_name="yueqian.jpeg", + ) + + assert provider.get_output_language() == "zh-CN" + + +def test_resource_linking_provider_exposes_resource_uri_only_as_metadata(): + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" + provider = _ResourceLinkingProvider( + resource_uri=resource_uri, + reason="这是越前龙马的照片", + source_name="yueqian.jpeg", + ) + + message_text = "\n".join( + part.text + for message in provider.messages + for part in message.parts + if getattr(part, "text", None) + ) + + instruction = provider.instruction() + assert resource_uri in instruction + assert resource_uri in provider._build_conversation_message()["content"] + assert resource_uri in provider.get_conversation_text() + assert resource_uri in message_text + assert "include the exact Resource URI in the visible memory content" not in instruction + assert "Use the Resource URI only as resource identity metadata" in instruction + assert "Do NOT include raw resource URIs" in instruction + + +def test_resource_linking_prompt_prefers_natural_sentence_over_terse_label(): + provider = _ResourceLinkingProvider( + resource_uri="viking://resources/reports/gdp_pdf", + reason="这个 PDF 第 65 页的人均 GDP 数据应为 4 万", + source_name="gdp.pdf", + ) + + instruction = provider.instruction() + assert "Create/edit visible memory as durable natural sentences" in instruction + assert "user intent/judgment" in instruction + assert "rewrite terse resource labels" in instruction + assert 'reason "page 3 total should be 42"' in instruction + assert '"User said page 3 total should be 42"' in instruction + assert "merge with it" in instruction + assert "only the newest resource" in instruction + assert "enumerate/count resources" in instruction + + +@pytest.mark.asyncio +async def test_append_resource_refs_linkifies_memory_entity_name_and_removes_plain_uri( + request_context, +): + memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"用户上传了一张越前龙马的照片,资源 URI:{resource_uri}", + extra_fields={ + "category": "fictional_character", + "name": "越前龙马", + "memory_type": "entities", + }, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + await service._append_resource_refs( + memory_uris=[memory_uri], + resource_uri=resource_uri, + reason="这是越前龙马的照片", + ctx=request_context, + ) + + written = store[memory_uri] + assert f"[越前龙马]({resource_uri})" in written + assert f"资源 URI:{resource_uri}" not in written + mf = MemoryFileUtils.read(written, uri=memory_uri) + assert mf.extra_fields["resource_refs"][0]["match_text"] == "越前龙马" + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_append_resource_refs_removes_colon_visible_uri_with_markdown_escape( + request_context, +): + memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" + visible_uri = "viking://resources/images/2026/06/10/yueqian\\_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"- 越前龙马的照片资源:{visible_uri}", + extra_fields={ + "category": "fictional_character", + "name": "越前龙马", + "memory_type": "entities", + }, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + await service._append_resource_refs( + memory_uris=[memory_uri], + resource_uri=resource_uri, + reason="这是越前龙马的照片", + ctx=request_context, + ) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == f"- [越前龙马]({resource_uri})的照片资源" + assert visible_uri not in mf.content + + +@pytest.mark.asyncio +async def test_append_resource_refs_falls_back_to_first_sentence_when_anchor_missing( + request_context, +): + memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" + store = {memory_uri: "用户上传了一张角色照片。后续句子不应被链接。"} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + await service._append_resource_refs( + memory_uris=[memory_uri], + resource_uri=resource_uri, + reason="这是越前龙马的照片", + ctx=request_context, + ) + + written = store[memory_uri] + assert f"[用户上传了一张角色照片。]({resource_uri})" in written + assert "后续句子不应被链接。" in written + mf = MemoryFileUtils.read(written, uri=memory_uri) + assert mf.extra_fields["resource_refs"][0]["match_text"] == "用户上传了一张角色照片。" + + +@pytest.mark.asyncio +async def test_find_referencing_memories_uses_memory_refs(request_context): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/docs/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + + matches = await service._find_referencing_memories( + ctx=request_context, + resource_uri="viking://resources/docs", + recursive=True, + ) + + assert len(matches) == 1 + assert matches[0].memory_uri == memory_uri + assert matches[0].resource_ref["resource_uri"] == resource_uri + + +@pytest.mark.asyncio +async def test_before_resource_delete_removes_refs_when_cleanup_has_no_changes(request_context): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + service._cleanup_memory_reference = AsyncMock(return_value=MemoryUpdateResult()) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + mf = MemoryFileUtils.read(service._get_viking_fs().store[memory_uri], uri=memory_uri) + assert "resource_refs" not in mf.extra_fields + + +@pytest.mark.asyncio +async def test_cleanup_memory_reference_does_not_introduce_schema_metadata(request_context): + memory_uri = "viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test3.md" + resource_uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + original_raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"今天是清明节。[用户保存了一张不二周助的照片]({resource_uri})", + extra_fields={ + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": "content.write", + } + ] + }, + ) + ) + store = {memory_uri: original_raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + service._run_extract_loop = AsyncMock(return_value=(object(), object(), object())) + + async def fake_apply_memory_operations(**kwargs): + store[memory_uri] = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content="今天是清明节。", + memory_type="entities", + extra_fields={ + "category": "anime_character", + "name": "不二周助", + "user_id": "ryoma", + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": "content.write", + } + ], + }, + ) + ) + result = MemoryUpdateResult() + result.add_edited(memory_uri) + return result + + service._apply_memory_operations = AsyncMock(side_effect=fake_apply_memory_operations) + + result = await service._cleanup_memory_reference( + ctx=request_context, + memory_uri=memory_uri, + memory_file=MemoryFileUtils.read(original_raw, uri=memory_uri), + resource_uri=resource_uri, + reason="", + ) + + assert result.edited_uris == [memory_uri] + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == "今天是清明节。" + assert mf.extra_fields == {} + assert mf.memory_type is None + + +@pytest.mark.asyncio +async def test_assert_resource_unlinked_propagates_non_not_found_errors(request_context): + service = ResourceMemoryLinkService(viking_fs=_ReadFailVikingFS()) + + with pytest.raises(RuntimeError, match="storage unavailable"): + await service._assert_resource_unlinked( + "viking://user/alice/memories/entities/wang.md", + "viking://resources/id_card.pdf", + request_context, + ) diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index 82161d7667..d7ad167eb2 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -392,6 +392,75 @@ async def mock_write_file(uri, content, **kwargs): assert [link["to_uri"] for link in caroline["backlinks"]] == [caroline_uri] assert melanie.get("backlinks", []) == [] + @pytest.mark.asyncio + async def test_apply_operations_does_not_write_backlinks_to_resource_targets(self): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/id_card.pdf" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + if uri == resource_uri: + raise AssertionError("resource target should not be read as a memory file") + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + if uri == resource_uri: + raise AssertionError("resource target should not be written as a memory file") + store[uri] = content + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock() + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "王大锤", + "content": "王大锤的身份证资料见资源。", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + resolved_links=[ + StoredLink( + from_uri=memory_uri, + to_uri=resource_uri, + link_type="references_resource", + match_text="资源", + ) + ], + ) + + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + memory = parse_memory_file_with_fields(store[memory_uri]) + assert memory["links"][0]["to_uri"] == resource_uri + assert resource_uri not in store + # The TestApplyWriteWithContentInFields tests are outdated because WriteOp no longer exists # The _apply_write method now accepts any flat model (dict or Pydantic model) that diff --git a/tests/test_link_renderer.py b/tests/test_link_renderer.py index e4c03c737f..429a25438b 100644 --- a/tests/test_link_renderer.py +++ b/tests/test_link_renderer.py @@ -339,6 +339,11 @@ def test_no_links(self): result = LinkRenderer.strip_links(content) assert result == content + def test_strip_all_links_removes_viking_uri_targets_for_embedding(self): + content = "用户上传了一张[越前龙马](viking://resources/images/yueqian_jpeg)的照片。" + result = LinkRenderer.strip_all_links(content) + assert result == "用户上传了一张越前龙马的照片。" + class TestRoundTrip: def test_render_then_strip(self): From e587d1058fbd2abcbad32f7d0f95c0bb321016dc Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Wed, 10 Jun 2026 20:32:45 +0800 Subject: [PATCH 02/19] =?UTF-8?q?ov=20write,=20rm=20=E6=9B=B4=E6=96=B0=20.?= =?UTF-8?q?overview?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/ov_cli/src/client.rs | 14 +- crates/ov_cli/src/commands/filesystem.rs | 4 +- crates/ov_cli/src/handlers.rs | 19 ++- crates/ov_cli/src/help_ui.rs | 17 ++- crates/ov_cli/src/main.rs | 13 +- crates/ov_cli/src/tui/app.rs | 2 +- openviking/async_client.py | 10 +- openviking/client/local.py | 16 ++- openviking/server/routers/filesystem.py | 10 +- openviking/service/core.py | 1 + openviking/service/fs_service.py | 120 +++++++++++++++- openviking/session/memory/memory_updater.py | 112 ++++++++++++++- openviking/storage/content_write.py | 125 +++++++++-------- openviking/sync_client.py | 10 +- openviking_cli/client/base.py | 8 +- openviking_cli/client/http.py | 13 +- openviking_cli/client/sync_http.py | 10 +- tests/server/conftest.py | 10 +- tests/server/test_content_write_service.py | 148 ++++++++++++-------- tests/server/test_request_wait_tracking.py | 7 +- 20 files changed, 515 insertions(+), 154 deletions(-) diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 6f228e68fa..1b611fa53e 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -353,11 +353,21 @@ impl HttpClient { self.post("/api/v1/fs/mkdir", &body).await } - pub async fn rm(&self, uri: &str, recursive: bool) -> Result { - let params = vec![ + pub async fn rm( + &self, + uri: &str, + recursive: bool, + wait: bool, + timeout: Option, + ) -> Result { + let mut params = vec![ ("uri".to_string(), uri.to_string()), ("recursive".to_string(), recursive.to_string()), + ("wait".to_string(), wait.to_string()), ]; + if let Some(timeout) = timeout { + params.push(("timeout".to_string(), timeout.to_string())); + } self.delete("/api/v1/fs", ¶ms).await } diff --git a/crates/ov_cli/src/commands/filesystem.rs b/crates/ov_cli/src/commands/filesystem.rs index 9c8b8a168d..9fa0a333ae 100644 --- a/crates/ov_cli/src/commands/filesystem.rs +++ b/crates/ov_cli/src/commands/filesystem.rs @@ -376,10 +376,12 @@ pub async fn rm( client: &HttpClient, uri: &str, recursive: bool, + wait: bool, + timeout: Option, output_format: OutputFormat, compact: bool, ) -> Result<()> { - let result = client.rm(uri, recursive).await?; + let result = client.rm(uri, recursive, wait, timeout).await?; let message = if let Some(count) = result .get("estimated_deleted_count") diff --git a/crates/ov_cli/src/handlers.rs b/crates/ov_cli/src/handlers.rs index f1e9d10390..10390b5f13 100644 --- a/crates/ov_cli/src/handlers.rs +++ b/crates/ov_cli/src/handlers.rs @@ -1318,9 +1318,24 @@ pub async fn handle_mkdir(uri: String, description: Option, ctx: CliCont .await } -pub async fn handle_rm(uri: String, recursive: bool, ctx: CliContext) -> Result<()> { +pub async fn handle_rm( + uri: String, + recursive: bool, + wait: bool, + timeout: Option, + ctx: CliContext, +) -> Result<()> { let client = ctx.get_client(); - commands::filesystem::rm(&client, &uri, recursive, ctx.output_format, ctx.compact).await + commands::filesystem::rm( + &client, + &uri, + recursive, + wait, + timeout, + ctx.output_format, + ctx.compact, + ) + .await } pub async fn handle_mv(from_uri: String, to_uri: String, ctx: CliContext) -> Result<()> { diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index fc7602dc7e..c246c9162d 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -598,7 +598,7 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ CommandHelpSpec { path: &["rm"], purpose: "Remove a resource from OpenViking.", - usage: "ov rm [--recursive]", + usage: "ov rm [--recursive] [--wait]", examples: &[ HelpItem { label: "ov rm viking://scratch/old-note.md", @@ -608,6 +608,10 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ label: "ov rm viking://scratch --recursive", description: "Remove a directory subtree.", }, + HelpItem { + label: "ov rm viking://resources/images/foo --recursive --wait", + description: "Remove a subtree and wait for generated overviews to refresh.", + }, ], arguments: &[HelpItem { label: "", @@ -617,7 +621,16 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ label: "-r, --recursive", description: "Required for directory/subtree removal.", }], - advanced_options: &[], + advanced_options: &[ + HelpItem { + label: "--wait", + description: "Wait for semantic refresh after deletion.", + }, + HelpItem { + label: "--timeout ", + description: "Maximum time to wait with --wait.", + }, + ], subcommands: &[], next_steps: &[ HelpItem { diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 5ec672ca6e..6875050e16 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -347,6 +347,12 @@ enum Commands { /// Remove recursively #[arg(short, long)] recursive: bool, + /// Wait until semantic refresh is complete + #[arg(long)] + wait: bool, + /// Wait timeout in seconds (only used with --wait) + #[arg(long)] + timeout: Option, }, /// [Data] Move or rename resource #[command(alias = "rename")] @@ -2410,7 +2416,12 @@ async fn main() { level_limit, } => handlers::handle_tree(uri, abs_limit, all, node_limit, level_limit, ctx).await, Commands::Mkdir { uri, description } => handlers::handle_mkdir(uri, description, ctx).await, - Commands::Rm { uri, recursive } => handlers::handle_rm(uri, recursive, ctx).await, + Commands::Rm { + uri, + recursive, + wait, + timeout, + } => handlers::handle_rm(uri, recursive, wait, timeout, ctx).await, Commands::Mv { from_uri, to_uri } => handlers::handle_mv(from_uri, to_uri, ctx).await, Commands::Stat { uri } => handlers::handle_stat(uri, ctx).await, Commands::AddMemory { content } => handlers::handle_add_memory(content, ctx).await, diff --git a/crates/ov_cli/src/tui/app.rs b/crates/ov_cli/src/tui/app.rs index 43f99051a0..fd0e0f0fb1 100644 --- a/crates/ov_cli/src/tui/app.rs +++ b/crates/ov_cli/src/tui/app.rs @@ -624,7 +624,7 @@ impl App { } }; - match client.rm(&selected_uri, is_dir).await { + match client.rm(&selected_uri, is_dir, false, None).await { Ok(_) => { self.set_status_message(format!("Deleted: {}", selected_uri)); diff --git a/openviking/async_client.py b/openviking/async_client.py index 48a2f00b4e..1e675ad923 100644 --- a/openviking/async_client.py +++ b/openviking/async_client.py @@ -496,10 +496,16 @@ async def ls(self, uri: str, **kwargs) -> List[Any]: show_all_hidden=show_all_hidden, ) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource""" await self._ensure_initialized() - await self._client.rm(uri, recursive=recursive) + await self._client.rm(uri, recursive=recursive, wait=wait, timeout=timeout) async def grep( self, diff --git a/openviking/client/local.py b/openviking/client/local.py index 2d7744d4f1..e03743cef7 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -231,9 +231,21 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: """Create directory.""" await self._service.fs.mkdir(uri, ctx=self._ctx, description=description) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" - await self._service.fs.rm(uri, ctx=self._ctx, recursive=recursive) + await self._service.fs.rm( + uri, + ctx=self._ctx, + recursive=recursive, + wait=wait, + timeout=timeout, + ) async def mv(self, from_uri: str, to_uri: str) -> None: """Move resource.""" diff --git a/openviking/server/routers/filesystem.py b/openviking/server/routers/filesystem.py index d9f3692986..210d560131 100644 --- a/openviking/server/routers/filesystem.py +++ b/openviking/server/routers/filesystem.py @@ -149,6 +149,8 @@ async def mkdir( async def rm( uri: str = Query(..., description="Viking URI"), recursive: bool = Query(False, description="Remove recursively"), + wait: bool = Query(False, description="Wait for semantic refresh to complete"), + timeout: Optional[float] = Query(None, description="Wait timeout in seconds"), _ctx: RequestContext = Depends(get_request_context), ): """Remove resource.""" @@ -156,7 +158,7 @@ async def rm( # Resolve path variables uri = resolve_path_variables(uri) try: - result = await service.fs.rm(uri, ctx=_ctx, recursive=recursive) + result = await service.fs.rm(uri, ctx=_ctx, recursive=recursive, wait=wait, timeout=timeout) except AGFSNotFoundError: raise NotFoundError(uri, "file") except AGFSClientError as e: @@ -175,6 +177,12 @@ async def rm( response_result["estimated_deleted_count"] = result["estimated_deleted_count"] if isinstance(result, dict) and "memory_cleanup" in result: response_result["memory_cleanup"] = result["memory_cleanup"] + if isinstance(result, dict) and "semantic_root_uri" in result: + response_result["semantic_root_uri"] = result["semantic_root_uri"] + if isinstance(result, dict) and "semantic_status" in result: + response_result["semantic_status"] = result["semantic_status"] + if isinstance(result, dict) and "queue_status" in result: + response_result["queue_status"] = result["queue_status"] return Response(status="ok", result=response_result) diff --git a/openviking/service/core.py b/openviking/service/core.py index 709d6bab85..d44db32fc9 100644 --- a/openviking/service/core.py +++ b/openviking/service/core.py @@ -416,6 +416,7 @@ async def initialize(self) -> None: # Wire up sub-services self._fs_service.set_dependencies( viking_fs=self._viking_fs, + vikingdb=self._vikingdb_manager, privacy_config_service=self._privacy_config_service, resource_memory_link_service=self._resource_memory_link_service, ) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index d1ad3af60c..fdb9a0cfa5 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -16,16 +16,23 @@ restore_skill_content, ) from openviking.server.identity import RequestContext +from openviking.session.memory.memory_updater import MemoryUpdater from openviking.storage.content_write import ContentWriteCoordinator +from openviking.storage.queuefs import SemanticMsg, get_queue_manager +from openviking.storage.queuefs.semantic_msg import build_semantic_coalesce_key from openviking.storage.viking_fs import VikingFS +from openviking.telemetry import get_current_telemetry +from openviking.telemetry.request_wait_tracker import get_request_wait_tracker +from openviking.telemetry.resource_summary import build_queue_status_payload from openviking.utils.embedding_utils import vectorize_directory_meta -from openviking_cli.exceptions import NotInitializedError +from openviking_cli.exceptions import DeadlineExceededError, NotInitializedError from openviking_cli.utils import VikingURI, get_logger logger = get_logger(__name__) if TYPE_CHECKING: from openviking.service.resource_memory_link_service import ResourceMemoryLinkService + from openviking.storage import VikingDBManager class FSService: @@ -34,21 +41,25 @@ class FSService: def __init__( self, viking_fs: Optional[VikingFS] = None, + vikingdb: Optional["VikingDBManager"] = None, privacy_config_service: Optional[UserPrivacyConfigService] = None, resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ): self._viking_fs = viking_fs + self._vikingdb = vikingdb self._privacy_config_service = privacy_config_service self._resource_memory_link_service = resource_memory_link_service def set_dependencies( self, viking_fs: VikingFS, + vikingdb: Optional["VikingDBManager"] = None, privacy_config_service: Optional[UserPrivacyConfigService] = None, resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ) -> None: """Set service dependencies (for deferred initialization).""" self._viking_fs = viking_fs + self._vikingdb = vikingdb self._privacy_config_service = privacy_config_service self._resource_memory_link_service = resource_memory_link_service @@ -165,23 +176,124 @@ def _resolve_directory_uris(uri: str) -> tuple[str, str]: return directory_uri, abstract_uri async def rm( - self, uri: str, ctx: RequestContext, recursive: bool = False + self, + uri: str, + ctx: RequestContext, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, ) -> Optional[Dict[str, Any]]: """Remove resource.""" uri = validate_viking_uri(uri) viking_fs = self._ensure_initialized() cleanup_result: Optional[Dict[str, Any]] = None - if self._resource_memory_link_service and context_type_for_uri(uri) == "resource": + context_type = context_type_for_uri(uri) + refresh_parent_uri = self._semantic_refresh_parent_uri(uri, context_type) + memory_overview_uri = self._memory_overview_parent_uri(uri, context_type) + if self._resource_memory_link_service and context_type == "resource": cleanup_result = await self._resource_memory_link_service.before_resource_delete( ctx=ctx, resource_uri=uri, recursive=recursive, ) result = await viking_fs.rm(uri, recursive=recursive, ctx=ctx) + queue_status = None + if refresh_parent_uri: + await self._enqueue_delete_refresh( + root_uri=refresh_parent_uri, + deleted_uri=uri, + context_type=context_type, + ctx=ctx, + ) + if wait: + queue_status = await self._wait_for_refresh(timeout=timeout) + if memory_overview_uri: + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=memory_overview_uri, + ctx=ctx, + ) if cleanup_result is not None and isinstance(result, dict): result["memory_cleanup"] = cleanup_result + if refresh_parent_uri and isinstance(result, dict): + result["semantic_root_uri"] = refresh_parent_uri + result["semantic_status"] = "complete" if wait else "queued" + if queue_status is not None: + result["queue_status"] = queue_status return result + @staticmethod + def _semantic_refresh_parent_uri(uri: str, context_type: str) -> Optional[str]: + if context_type != "resource": + return None + parent = VikingURI(uri).parent + return parent.uri if parent else None + + @staticmethod + def _memory_overview_parent_uri(uri: str, context_type: str) -> Optional[str]: + if context_type != "memory": + return None + leaf = uri.rstrip("/").rsplit("/", 1)[-1] + if leaf in {".abstract.md", ".overview.md", ".relations.json"}: + return None + parent = VikingURI(uri).parent + if parent is None: + return None + if not MemoryUpdater.memory_type_from_uri(parent.uri): + return None + return parent.uri + + async def _enqueue_delete_refresh( + self, + *, + root_uri: str, + deleted_uri: str, + context_type: str, + ctx: RequestContext, + ) -> None: + queue_manager = get_queue_manager() + semantic_queue = queue_manager.get_queue(queue_manager.SEMANTIC, allow_create=True) + telemetry_id = get_current_telemetry().telemetry_id + msg = SemanticMsg( + uri=root_uri, + context_type=context_type, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + peer_id=ctx.user.user_id, + role=ctx.role.value, + skip_vectorization=False, + telemetry_id=telemetry_id, + coalesce_key=build_semantic_coalesce_key( + context_type=context_type, + uri=root_uri, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + peer_id=ctx.user.user_id, + ), + changes={"deleted": [deleted_uri]}, + ) + if telemetry_id: + get_request_wait_tracker().register_semantic_root(telemetry_id, msg.id) + try: + await semantic_queue.enqueue(msg) + except Exception as exc: + if telemetry_id: + get_request_wait_tracker().mark_semantic_failed(telemetry_id, msg.id, str(exc)) + raise + + async def _wait_for_refresh(self, *, timeout: Optional[float]) -> Dict[str, Any]: + telemetry_id = get_current_telemetry().telemetry_id + if telemetry_id: + try: + await get_request_wait_tracker().wait_for_request(telemetry_id, timeout=timeout) + except TimeoutError as exc: + raise DeadlineExceededError("queue processing", timeout) from exc + return get_request_wait_tracker().build_queue_status(telemetry_id) + try: + return build_queue_status_payload(await get_queue_manager().wait_complete(timeout=timeout)) + except TimeoutError as exc: + raise DeadlineExceededError("queue processing", timeout) from exc + async def mv(self, from_uri: str, to_uri: str, ctx: RequestContext) -> None: """Move resource.""" from_uri = validate_viking_uri(from_uri, field_name="from_uri") @@ -317,7 +429,7 @@ async def write( """Write to an existing file and refresh semantics/vectors.""" uri = validate_viking_uri(uri) viking_fs = self._ensure_initialized() - coordinator = ContentWriteCoordinator(viking_fs=viking_fs) + coordinator = ContentWriteCoordinator(viking_fs=viking_fs, vikingdb=self._vikingdb) return await coordinator.write( uri=uri, content=content, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 0350aecd7a..eaacb678dc 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -33,7 +33,7 @@ from openviking.telemetry.request_wait_tracker import get_request_wait_tracker from openviking.utils.time_utils import parse_iso_datetime from openviking_cli.exceptions import NotFoundError -from openviking_cli.utils import get_logger +from openviking_cli.utils import VikingURI, get_logger logger = get_logger(__name__) @@ -335,6 +335,70 @@ def _get_viking_fs(self): self._viking_fs = get_viking_fs() return self._viking_fs + @classmethod + async def refresh_schema_overview( + cls, + *, + viking_fs: Any, + directory_uri: str, + ctx: RequestContext, + ) -> None: + memory_type = cls.memory_type_from_uri(directory_uri) + if not memory_type: + return + try: + from openviking.session.memory.memory_type_registry import create_default_registry + + updater = cls(registry=create_default_registry()) + updater._viking_fs = viking_fs + await updater.generate_overview(memory_type, directory_uri, ctx) + except Exception: + logger.warning( + "Failed to refresh memory overview for %s", + directory_uri, + exc_info=True, + ) + + @classmethod + async def refresh_file_embedding( + cls, + *, + viking_fs: Any, + vikingdb: Any, + uri: str, + memory_type: Optional[str], + ctx: RequestContext, + ) -> bool: + if not vikingdb or not bool(getattr(vikingdb, "has_queue_manager", False)): + return False + try: + from openviking.session.memory.memory_type_registry import create_default_registry + + result = MemoryUpdateResult() + result.add_written(uri) + updater = cls(registry=create_default_registry(), vikingdb=vikingdb) + updater._viking_fs = viking_fs + attempted = await updater._vectorize_memories( + result, + ctx, + uri_memory_type_map={uri: memory_type} if memory_type else {}, + ) + return attempted > 0 + except Exception: + logger.warning("Failed to refresh memory embedding for %s", uri, exc_info=True) + return False + + @staticmethod + def memory_type_from_uri(uri: str) -> Optional[str]: + parts = [part for part in VikingURI(uri).full_path.split("/") if part] + try: + memories_idx = parts.index("memories") + except ValueError: + return None + if len(parts) <= memories_idx + 1: + return None + return parts[memories_idx + 1] + @tracer() async def apply_operations( self, @@ -640,7 +704,7 @@ async def _vectorize_memories( ctx: RequestContext, extract_context: Any = None, uri_memory_type_map: Dict[str, str] = None, - ) -> None: + ) -> int: """Vectorize written and edited memory files. Args: @@ -651,11 +715,12 @@ async def _vectorize_memories( """ if not self._vikingdb: logger.debug("VikingDB not available, skipping vectorization") - return + return 0 uri_memory_type_map = uri_memory_type_map or {} viking_fs = self._get_viking_fs() request_wait_tracker = get_request_wait_tracker() + attempted_count = 0 # Collect all URIs to vectorize (skip .overview.md and .abstract.md - they are handled separately) # Also skip URIs that were deleted in the same batch @@ -669,7 +734,7 @@ async def _vectorize_memories( if not uris_to_vectorize: logger.debug("No memory files to vectorize") - return + return 0 for uri in uris_to_vectorize: try: @@ -736,7 +801,17 @@ async def _vectorize_memories( request_wait_tracker.register_embedding_root( embedding_msg.telemetry_id, embedding_msg.id ) - enqueued = await self._vikingdb.enqueue_embedding_msg(embedding_msg) + attempted_count += 1 + try: + enqueued = await self._vikingdb.enqueue_embedding_msg(embedding_msg) + except Exception as e: + if embedding_msg.telemetry_id: + request_wait_tracker.mark_embedding_failed( + embedding_msg.telemetry_id, + embedding_msg.id, + str(e), + ) + raise if not enqueued and embedding_msg.telemetry_id: request_wait_tracker.mark_embedding_failed( embedding_msg.telemetry_id, @@ -747,6 +822,7 @@ async def _vectorize_memories( except Exception as e: tracer.error(f"Failed to vectorize memory {uri}: {e}") + return attempted_count async def generate_overview( self, @@ -819,11 +895,18 @@ async def generate_overview( # Extract filename from path filename = file_path.split("/")[-1] + metadata = mf.to_metadata() + self._fill_overview_fallback_fields( + memory_type=memory_type, + directory=directory, + filename=filename, + metadata=metadata, + ) items.append( { "file_name": filename, - "file_content": mf.to_metadata(), + "file_content": metadata, } ) except Exception as e: @@ -854,3 +937,20 @@ async def generate_overview( await viking_fs.write_file(overview_path, rendered, ctx=ctx) except Exception as e: tracer.error(f"Failed to write overview {overview_path}: {e}") + + @staticmethod + def _fill_overview_fallback_fields( + *, + memory_type: str, + directory: str, + filename: str, + metadata: Dict[str, Any], + ) -> None: + stem = filename.removesuffix(".md") + parent_name = directory.rstrip("/").split("/")[-1] + if memory_type == "entities": + metadata.setdefault("category", parent_name) + metadata.setdefault("name", stem) + elif memory_type == "preferences": + metadata.setdefault("user", parent_name) + metadata.setdefault("topic", stem) diff --git a/openviking/storage/content_write.py b/openviking/storage/content_write.py index 5a9248e9d5..80070ebed1 100644 --- a/openviking/storage/content_write.py +++ b/openviking/storage/content_write.py @@ -12,6 +12,7 @@ from openviking.core.namespace import NamespaceShapeError, canonicalize_uri, context_type_for_uri from openviking.resource.watch_storage import is_watch_task_control_uri from openviking.server.identity import RequestContext +from openviking.session.memory.memory_updater import MemoryUpdater from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils from openviking.storage.queuefs import SemanticMsg, get_queue_manager from openviking.storage.queuefs.semantic_msg import build_semantic_coalesce_key @@ -48,8 +49,9 @@ class ContentWriteCoordinator: """Write a file (create or modify) and trigger downstream maintenance.""" - def __init__(self, viking_fs: VikingFS): + def __init__(self, viking_fs: VikingFS, vikingdb: Any = None): self._viking_fs = viking_fs + self._vikingdb = vikingdb async def write( self, @@ -122,12 +124,16 @@ def _build_write_result( written_bytes: int, wait: bool, queue_status: Optional[Dict[str, Any]], + semantic_status: Optional[str] = None, + vector_status: Optional[str] = None, + overview_status: Optional[str] = None, ) -> Dict[str, Any]: - semantic_status, vector_status = self._refresh_statuses( - wait=wait, - queue_status=queue_status, - ) - return { + if semantic_status is None or vector_status is None: + semantic_status, vector_status = self._refresh_statuses( + wait=wait, + queue_status=queue_status, + ) + result = { "uri": uri, "root_uri": root_uri, "context_type": context_type, @@ -138,6 +144,9 @@ def _build_write_result( "vector_status": vector_status, "queue_status": queue_status, } + if overview_status is not None: + result["overview_status"] = overview_status + return result def _refresh_statuses( self, @@ -596,43 +605,6 @@ async def _enqueue_semantic_refresh( get_request_wait_tracker().mark_semantic_failed(msg.telemetry_id, msg.id, str(e)) raise - async def _enqueue_memory_refresh( - self, - *, - root_uri: str, - modified_uri: str, - ctx: RequestContext, - ) -> None: - queue_manager = get_queue_manager() - semantic_queue = queue_manager.get_queue(queue_manager.SEMANTIC, allow_create=True) - telemetry = get_current_telemetry() - msg = SemanticMsg( - uri=root_uri, - context_type="memory", - account_id=ctx.account_id, - user_id=ctx.user.user_id, - peer_id=ctx.user.user_id, - role=ctx.role.value, - skip_vectorization=False, - telemetry_id=telemetry.telemetry_id, - coalesce_key=build_semantic_coalesce_key( - context_type="memory", - uri=root_uri, - account_id=ctx.account_id, - user_id=ctx.user.user_id, - peer_id=ctx.user.user_id, - ), - changes={"modified": [modified_uri]}, - ) - if msg.telemetry_id: - get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id) - try: - await semantic_queue.enqueue(msg) - except Exception as e: - if msg.telemetry_id: - get_request_wait_tracker().mark_semantic_failed(msg.telemetry_id, msg.id, str(e)) - raise - async def _wait_for_queues(self, *, timeout: Optional[float]) -> Dict[str, Any]: queue_manager = get_queue_manager() try: @@ -678,21 +650,37 @@ async def _write_memory_with_refresh( raise InvalidArgumentError(f"resource is busy and cannot be written now: {uri}") released = False + request_registered = False try: - if wait and telemetry_id: - get_request_wait_tracker().register_request(telemetry_id) await self._write_in_place(uri, content, mode=mode, ctx=ctx) - await self._enqueue_memory_refresh( - root_uri=root_uri, - modified_uri=uri, - ctx=ctx, - ) await lock_manager.release(handle) released = True - queue_status = ( - await self._wait_for_request(telemetry_id=telemetry_id, timeout=timeout) - if wait - else None + if wait and telemetry_id and self._vikingdb_has_queue(): + get_request_wait_tracker().register_request(telemetry_id) + request_registered = True + await MemoryUpdater.refresh_schema_overview( + viking_fs=self._viking_fs, + directory_uri=root_uri, + ctx=ctx, + ) + embedding_requested = await MemoryUpdater.refresh_file_embedding( + viking_fs=self._viking_fs, + vikingdb=self._vikingdb, + uri=uri, + memory_type=MemoryUpdater.memory_type_from_uri(root_uri), + ctx=ctx, + ) + queue_status = None + if embedding_requested and wait: + queue_status = ( + await self._wait_for_request(telemetry_id=telemetry_id, timeout=timeout) + if telemetry_id + else await self._wait_for_queues(timeout=timeout) + ) + vector_status = self._memory_vector_status( + embedding_requested=embedding_requested, + wait=wait, + queue_status=queue_status, ) return self._build_write_result( uri=uri, @@ -702,15 +690,37 @@ async def _write_memory_with_refresh( written_bytes=written_bytes, wait=wait, queue_status=queue_status, + semantic_status="skipped", + vector_status=vector_status, + overview_status="complete", ) except Exception: if not released: await lock_manager.release(handle) raise finally: - if wait and telemetry_id: + if request_registered: get_request_wait_tracker().cleanup(telemetry_id) + def _vikingdb_has_queue(self) -> bool: + if not self._vikingdb: + return False + return bool(getattr(self._vikingdb, "has_queue_manager", False)) + + def _memory_vector_status( + self, + *, + embedding_requested: bool, + wait: bool, + queue_status: Optional[Dict[str, Any]], + ) -> str: + if not embedding_requested: + return "skipped" + if not wait: + return "queued" + _, vector_status = self._refresh_statuses(wait=True, queue_status=queue_status) + return vector_status + async def _resolve_root_uri( self, uri: str, @@ -738,7 +748,10 @@ async def _resolve_root_uri( raise InvalidArgumentError( f"memory write target must be inside a memory type directory: {uri}" ) - root_uri = VikingURI.build(*parts[: memories_idx + 2]) + parent = VikingURI(uri).parent + if parent is None: + raise InvalidArgumentError(f"could not resolve write root for {uri}") + root_uri = parent.uri stat = await self._safe_stat(root_uri, ctx=ctx, allow_not_found=_allow_not_found) if stat.get("not_found") or not stat.get("isDir"): diff --git a/openviking/sync_client.py b/openviking/sync_client.py index bd048bea2f..56f72ad283 100644 --- a/openviking/sync_client.py +++ b/openviking/sync_client.py @@ -391,9 +391,15 @@ def relations(self, uri: str) -> List[Dict[str, Any]]: """Get relations""" return run_async(self._async_client.relations(uri)) - def rm(self, uri: str, recursive: bool = False) -> None: + def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: float = None, + ) -> None: """Delete resource""" - return run_async(self._async_client.rm(uri, recursive)) + return run_async(self._async_client.rm(uri, recursive, wait=wait, timeout=timeout)) def wait_processed(self, timeout: float = None) -> Dict[str, Any]: """Wait for all async operations to complete""" diff --git a/openviking_cli/client/base.py b/openviking_cli/client/base.py index 9dc3944125..4efd750ce4 100644 --- a/openviking_cli/client/base.py +++ b/openviking_cli/client/base.py @@ -112,7 +112,13 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: ... @abstractmethod - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" ... diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index 3c4a60b711..f0c1c07426 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -596,13 +596,22 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: ) self._handle_response(response) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" uri = VikingURI.normalize(uri) + params = {"uri": uri, "recursive": recursive, "wait": wait} + if timeout is not None: + params["timeout"] = timeout response = await self._http.request( "DELETE", "/api/v1/fs", - params={"uri": uri, "recursive": recursive}, + params=params, ) self._handle_response(response) diff --git a/openviking_cli/client/sync_http.py b/openviking_cli/client/sync_http.py index 6dad05bb84..77c7d32b6a 100644 --- a/openviking_cli/client/sync_http.py +++ b/openviking_cli/client/sync_http.py @@ -400,9 +400,15 @@ def mkdir(self, uri: str, description: Optional[str] = None) -> None: """Create directory.""" run_async(self._async_client.mkdir(uri, description=description)) - def rm(self, uri: str, recursive: bool = False) -> None: + def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" - run_async(self._async_client.rm(uri, recursive)) + run_async(self._async_client.rm(uri, recursive, wait=wait, timeout=timeout)) def mv(self, from_uri: str, to_uri: str) -> None: """Move resource.""" diff --git a/tests/server/conftest.py b/tests/server/conftest.py index 7208238e4c..18ad2c5de3 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -53,22 +53,22 @@ def _install_fake_embedder(monkeypatch): """Use an in-process fake embedder so server tests never hit external APIs.""" - dimension = 1024 class FakeEmbedder(DenseEmbedderBase): - def __init__(self): + def __init__(self, dimension: int = 2048): super().__init__(model_name="test-fake-embedder") + self._dimension = dimension def embed(self, text: str, is_query: bool = False) -> EmbedResult: - return EmbedResult(dense_vector=[0.1] * dimension) + return EmbedResult(dense_vector=[0.1] * self._dimension) def embed_batch(self, texts: list[str], is_query: bool = False) -> list[EmbedResult]: return [self.embed(text, is_query=is_query) for text in texts] def get_dimension(self) -> int: - return dimension + return self._dimension - monkeypatch.setattr(EmbeddingConfig, "get_embedder", lambda self: FakeEmbedder()) + monkeypatch.setattr(EmbeddingConfig, "get_embedder", lambda self: FakeEmbedder(self.dimension)) return FakeEmbedder diff --git a/tests/server/test_content_write_service.py b/tests/server/test_content_write_service.py index 406212d343..86945311a0 100644 --- a/tests/server/test_content_write_service.py +++ b/tests/server/test_content_write_service.py @@ -35,9 +35,14 @@ async def test_write_updates_memory_file_and_parent_overview(service): ) assert result["context_type"] == "memory" + assert result["semantic_status"] == "skipped" + assert result["vector_status"] == "complete" + assert result["overview_status"] == "complete" + assert result["queue_status"]["Embedding"]["processed"] >= 1 assert await service.viking_fs.read_file(memory_uri, ctx=ctx) == "Updated preference" assert await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) - assert await service.viking_fs.read_file(f"{memory_dir}/.abstract.md", ctx=ctx) + with pytest.raises(NotFoundError): + await service.viking_fs.read_file(f"{memory_dir}/.abstract.md", ctx=ctx) @pytest.mark.asyncio @@ -180,6 +185,56 @@ async def test_memory_write_ignores_resource_uri_in_inline_code(service): assert mf.links == [] +@pytest.mark.asyncio +async def test_memory_create_refreshes_nested_schema_overview(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_dir = f"viking://user/{ctx.user.user_space_name()}/memories/entities/动漫角色" + memory_uri = f"{memory_dir}/不二周助-link-test.md" + + result = await service.fs.write( + memory_uri, + content="用户保存了一张[不二周助](viking://resources/images/2026/06/10/不二周助_jpeg)的照片", + ctx=ctx, + mode="create", + wait=False, + ) + + overview = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert result["root_uri"] == memory_dir + assert "[不二周助-link-test](./不二周助-link-test.md)" in overview + + +@pytest.mark.asyncio +async def test_memory_rm_refreshes_nested_schema_overview(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_dir = f"viking://user/{ctx.user.user_space_name()}/memories/entities/动漫角色" + deleted_uri = f"{memory_dir}/不二周助-delete-test.md" + kept_uri = f"{memory_dir}/越前龙马-keep-test.md" + + await service.fs.write( + deleted_uri, + content="用户保存了一张不二周助的照片", + ctx=ctx, + mode="create", + ) + await service.fs.write( + kept_uri, + content="用户保存了一张越前龙马的照片", + ctx=ctx, + mode="create", + ) + + before = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert "[不二周助-delete-test](./不二周助-delete-test.md)" in before + assert "[越前龙马-keep-test](./越前龙马-keep-test.md)" in before + + await service.fs.rm(deleted_uri, ctx=ctx) + + after = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert "不二周助-delete-test" not in after + assert "[越前龙马-keep-test](./越前龙马-keep-test.md)" in after + + class _FakeHandle: def __init__(self, handle_id: str): self.id = handle_id @@ -438,7 +493,7 @@ async def _fail_enqueue(**kwargs): @pytest.mark.asyncio -async def test_memory_write_timeout_after_enqueue_releases_write_lock(monkeypatch): +async def test_memory_write_wait_skips_semantic_queue_and_releases_write_lock(monkeypatch): file_uri = "viking://user/default/memories/preferences/theme.md" root_uri = "viking://user/default/memories/preferences" ctx = RequestContext(user=UserIdentifier.the_default_user(), role=Role.USER) @@ -455,27 +510,33 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fail_wait_for_request(*, telemetry_id, timeout): + del telemetry_id, timeout + raise AssertionError("memory write should not wait for semantic refresh") + + async def _fake_refresh_schema_overview(**kwargs): del kwargs return None - async def _fake_wait_for_request(*, telemetry_id, timeout): - del telemetry_id - raise DeadlineExceededError("queue processing", timeout) - monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) - monkeypatch.setattr(coordinator, "_wait_for_request", _fake_wait_for_request) + monkeypatch.setattr(coordinator, "_wait_for_request", _fail_wait_for_request) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) - with pytest.raises(DeadlineExceededError): - await coordinator.write( - uri=file_uri, - content="updated", - ctx=ctx, - wait=True, - ) + result = await coordinator.write( + uri=file_uri, + content="updated", + ctx=ctx, + wait=True, + ) assert lock_manager.release_calls == ["lock-1"] + assert result["semantic_status"] == "skipped" + assert result["vector_status"] == "skipped" + assert result["overview_status"] == "complete" + assert result["queue_status"] is None # Create-mode test helpers @@ -546,16 +607,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -590,7 +646,7 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fake_refresh_schema_overview(**kwargs): refresh_calls.append(kwargs) return None @@ -599,8 +655,11 @@ async def _fake_wait_for_queues(*, timeout): return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) result = await coordinator.write( uri=input_uri, content="new content", mode="create", ctx=ctx, wait=True @@ -610,8 +669,7 @@ async def _fake_wait_for_queues(*, timeout): assert result["root_uri"] == root_uri assert result["context_type"] == "memory" assert write_calls == [(canonical_uri, "new content")] - assert refresh_calls[0]["root_uri"] == root_uri - assert refresh_calls[0]["modified_uri"] == canonical_uri + assert refresh_calls[0]["directory_uri"] == root_uri @pytest.mark.asyncio @@ -626,16 +684,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) with pytest.raises(AlreadyExistsError): @@ -654,16 +707,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) with pytest.raises(InvalidArgumentError): @@ -688,16 +736,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -732,16 +775,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, mode, ctx return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -767,7 +805,7 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): refresh_calls = [] - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fake_refresh_schema_overview(**kwargs): refresh_calls.append(kwargs) return None @@ -776,15 +814,17 @@ async def _fake_wait_for_queues(*, timeout): return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) result = await coordinator.write( uri=file_uri, content="content", mode="create", ctx=ctx, wait=True ) assert result["context_type"] == "memory" - assert refresh_calls[0]["root_uri"] == root_uri - assert refresh_calls[0]["modified_uri"] == file_uri + assert refresh_calls[0]["directory_uri"] == root_uri @pytest.mark.asyncio @@ -838,16 +878,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -874,16 +909,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( diff --git a/tests/server/test_request_wait_tracking.py b/tests/server/test_request_wait_tracking.py index 8eacc3ff7a..3309da21d4 100644 --- a/tests/server/test_request_wait_tracking.py +++ b/tests/server/test_request_wait_tracking.py @@ -25,7 +25,8 @@ def __init__(self, queue_status): def register_request(self, telemetry_id: str) -> None: self.registered_requests.append(telemetry_id) - async def wait_for_request(self, telemetry_id: str, timeout): + async def wait_for_request(self, telemetry_id: str, timeout, poll_interval=None): + del poll_interval self.wait_calls.append((telemetry_id, timeout)) def build_queue_status(self, telemetry_id: str): @@ -264,7 +265,7 @@ async def test_content_write_wait_uses_request_tracker(monkeypatch): ) lock_manager = SimpleNamespace( create_handle=lambda: SimpleNamespace(id="lock-1"), - acquire_tree=lambda handle, path: _return_true(handle, path), + acquire_exact_path=lambda handle, path: _return_true(handle, path), release=lambda handle: _return_none(handle), ) @@ -324,7 +325,7 @@ async def test_content_write_wait_uses_request_tracker_when_telemetry_disabled(m ) lock_manager = SimpleNamespace( create_handle=lambda: SimpleNamespace(id="lock-1"), - acquire_tree=lambda handle, path: _return_true(handle, path), + acquire_exact_path=lambda handle, path: _return_true(handle, path), release=lambda handle: _return_none(handle), ) From 0d78d1f582ae6e16c315f356f7e59f82460e1e21 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Thu, 11 Jun 2026 14:28:36 +0800 Subject: [PATCH 03/19] =?UTF-8?q?=E6=9B=B4=E6=96=B0docs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/en/api/02-resources.md | 4 +++- docs/en/api/03-filesystem.md | 2 ++ docs/zh/api/02-resources.md | 5 ++--- docs/zh/api/03-filesystem.md | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 0c7457f402..de16b4f51a 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -146,7 +146,7 @@ This endpoint is the core entry point for resource management, supporting adding | to | string | No | - | Target Viking URI (exact location). Mutually exclusive with `parent` | | parent | string | No | - | Parent Viking URI (resource placed under this directory). Mutually exclusive with `to` | | create_parent | bool | No | False | Automatically create parent directory if it does not exist (server-side flag) | -| reason | string | No | "" | Reason for adding the resource (for documentation and relevance improvement, experimental feature) | +| reason | string | No | "" | Reason for adding the resource. When non-empty, OpenViking uses the reason and resource URI to generate or update user memory and record the resource reference in that memory | | instruction | string | No | "" | Processing instructions for semantic extraction (experimental feature) | | wait | bool | No | False | Whether to wait for semantic processing and vectorization to complete before returning | | timeout | float | No | None | Timeout in seconds, only effective when `wait=True` | @@ -165,6 +165,8 @@ This endpoint is the core entry point for resource management, supporting adding - Raw HTTP calls for local files require first uploading via [temp_upload](#temp_upload) to obtain `temp_file_id` - When `to` is specified and the target already exists, triggers incremental update - Only Git repository sources use full background import when `wait=false`; OpenViking performs repository preflight and target planning before returning the `task_id`. +- Memory generated from `reason` does not expand resource content. It only uses `reason`, the `viking://resources/...` URI, and the available resource name. OpenViking chooses an appropriate existing user memory type, such as `profile`, `entities`, `events`, or `preferences`; it does not force writes into a fixed memory type. +- When deleting `viking://resources/...`, OpenViking scans the current user's memories before deletion, removes the matching resource URI and content introduced by that `reason`, and refreshes the semantic index for the affected memories. - Other sources with `wait=false` finish source parsing, target resolution, and AGFS writes before returning. Only semantic and embedding queues continue asynchronously. - When `watch_interval > 0`, the watch task binds to `to` if provided; otherwise it binds to the `root_uri` returned by this import. If no stable `root_uri` is available, the request fails and asks for an explicit `to`. - For local directory inputs, scanning respects `.gitignore` files (root and nested) with standard Git semantics; `ignore_dirs`, `include`, and `exclude` further refine what is ingested. diff --git a/docs/en/api/03-filesystem.md b/docs/en/api/03-filesystem.md index e7b46fbcea..d9c812b90c 100644 --- a/docs/en/api/03-filesystem.md +++ b/docs/en/api/03-filesystem.md @@ -634,6 +634,8 @@ openviking rm viking://resources/old.md [--recursive] The `estimated_deleted_count` field (for recursive deletes) contains the estimated number of items (files and directories) deleted (from vector index). The CLI will display this information in output. +When deleting `viking://resources/...`, the response may include `memory_cleanup`, indicating that user memories referencing that resource URI were cleaned up before deletion. + --- ### mv() diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 54f7e4faf8..44df0bb22f 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -142,7 +142,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector | to | string | 否 | - | 目标 Viking URI(精确位置)。与 `parent` 互斥 | | parent | string | 否 | - | 父级 Viking URI(资源放入此目录下)。与 `to` 互斥 | | create_parent | bool | 否 | False | 如果父目录不存在,自动创建父目录(服务端标志) | -| reason | string | 否 | "" | 添加资源的原因。非空时会基于该原因和资源 URI 生成或更新用户记忆,并在记忆中记录对资源的引用 | +| reason | string | 否 | "" | 添加资源的原因(用于文档化和相关性提升,实验特性) | | instruction | string | 否 | "" | 语义提取的处理指令(实验特性) | | wait | bool | 否 | False | 是否等待语义处理和向量化完成才返回 | | timeout | float | 否 | None | 超时时间(秒),仅 `wait=true` 时生效 | @@ -159,8 +159,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector - `to` 和 `parent` 不能同时使用;如果使用 `parent` 且希望父目录不存在时自动创建,请传 `create_parent=true`。指定 `to` 且目标已存在时,触发增量更新。 - `path` 和 `temp_file_id` 不能同时指定,上传本地文件需要先通过 [temp_upload](#temp_upload) 上传获取 `temp_file_id`,在 SDK 和 CLI 中已经封装好。 - 只有 Git 仓库来源在 `wait=false` 时使用完整后台导入;OpenViking 会先完成仓库 preflight 和目标规划,再返回 `task_id`。 -- `reason` 触发的记忆生成不会读取或展开资源正文,只使用 `reason`、`viking://resources/...` URI 和可用的资源名称。系统会选择合适的既有用户记忆类型(如 `profile`、`entities`、`events`、`preferences`),不会强制写入固定记忆类型。 -- 资源文件本身不会写入额外注释或 metadata 文件。资源与记忆的关联只保存在记忆文件的 `MEMORY_FIELDS.resource_refs` / `links` 中。 +- `reason` 触发的记忆生成不会读取或展开资源正文,只使用 `reason`、`viking://resources/...` URI 和可用的资源名称。 - 删除 `viking://resources/...` 时,系统会在删除前扫描当前用户记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 - 其他来源在 `wait=false` 时会在响应前完成来源解析、目标解析和 AGFS 写入,仅 semantic 与 embedding 队列继续异步处理。 - `watch_interval > 0` 时,如果指定了 `to`,监控任务绑定该目标;如果未指定 `to`,监控任务绑定本次导入返回的 `root_uri`。如果无法得到稳定 `root_uri`,请求会报错并要求显式传 `to`。 diff --git a/docs/zh/api/03-filesystem.md b/docs/zh/api/03-filesystem.md index 1ec2724477..5b786aeff1 100644 --- a/docs/zh/api/03-filesystem.md +++ b/docs/zh/api/03-filesystem.md @@ -635,7 +635,7 @@ openviking rm viking://resources/old.md [--recursive] `estimated_deleted_count` 字段(递归删除时)包含删除的项目(文件和目录)估计数量(来自向量索引)。CLI 会在输出中显示此信息。 -删除 `viking://resources/...` 时,响应可能包含 `memory_cleanup`,表示删除前已清理引用该资源 URI 的用户记忆。关联只来自记忆文件的 `MEMORY_FIELDS.resource_refs` / `links`,资源文件本身不会保存 reason 注释或 sidecar metadata 文件。 +删除 `viking://resources/...` 时,响应可能包含 `memory_cleanup`,表示删除前已清理引用该资源 URI 的用户记忆。 --- From 45de78a18bc3b190d33055574b4dac4974d92440 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Thu, 11 Jun 2026 15:03:26 +0800 Subject: [PATCH 04/19] bug fix --- openviking/service/fs_service.py | 30 ++++++ tests/server/test_filesystem_router.py | 2 +- tests/service/test_fs_service.py | 130 +++++++++++++++++++++++++ 3 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 tests/service/test_fs_service.py diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index fdb9a0cfa5..799b21fc67 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -213,6 +213,12 @@ async def rm( directory_uri=memory_overview_uri, ctx=ctx, ) + for cleanup_overview_uri in self._memory_overview_parent_uris_from_cleanup(cleanup_result): + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=cleanup_overview_uri, + ctx=ctx, + ) if cleanup_result is not None and isinstance(result, dict): result["memory_cleanup"] = cleanup_result if refresh_parent_uri and isinstance(result, dict): @@ -243,6 +249,30 @@ def _memory_overview_parent_uri(uri: str, context_type: str) -> Optional[str]: return None return parent.uri + @classmethod + def _memory_overview_parent_uris_from_cleanup( + cls, + cleanup_result: Optional[Dict[str, Any]], + ) -> List[str]: + if not isinstance(cleanup_result, dict): + return [] + + overview_uris: List[str] = [] + for field in ("memory_uris", "deleted_memory_uris"): + values = cleanup_result.get(field) + if not isinstance(values, list): + continue + for memory_uri in values: + if not isinstance(memory_uri, str): + continue + overview_uri = cls._memory_overview_parent_uri( + memory_uri, + context_type_for_uri(memory_uri), + ) + if overview_uri: + overview_uris.append(overview_uri) + return list(dict.fromkeys(overview_uris)) + async def _enqueue_delete_refresh( self, *, diff --git a/tests/server/test_filesystem_router.py b/tests/server/test_filesystem_router.py index 8f16c8ff06..5d6c5a1ddc 100644 --- a/tests/server/test_filesystem_router.py +++ b/tests/server/test_filesystem_router.py @@ -15,7 +15,7 @@ async def test_rm_preserves_memory_cleanup(monkeypatch): cleanup = {"status": "success", "memory_uris": ["viking://user/alice/memories/entities/a.md"]} - async def fake_rm(uri, ctx=None, recursive=False): + async def fake_rm(uri, ctx=None, recursive=False, wait=False, timeout=None): return {"estimated_deleted_count": 1, "memory_cleanup": cleanup} monkeypatch.setattr( diff --git a/tests/service/test_fs_service.py b/tests/service/test_fs_service.py new file mode 100644 index 0000000000..f1e3393d48 --- /dev/null +++ b/tests/service/test_fs_service.py @@ -0,0 +1,130 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for file-system service coordination behavior.""" + +from unittest.mock import AsyncMock + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.service.fs_service import FSService +from openviking_cli.session.user_id import UserIdentifier + + +class _FakeVikingFS: + def __init__(self): + self.rm_calls = [] + + async def rm(self, uri, recursive=False, ctx=None): + self.rm_calls.append({"uri": uri, "recursive": recursive, "ctx": ctx}) + return {"estimated_deleted_count": 3} + + +class _FakeResourceMemoryLinkService: + def __init__(self, result): + self.result = result + self.calls = [] + + async def before_resource_delete(self, *, ctx, resource_uri, recursive=False): + self.calls.append({"ctx": ctx, "resource_uri": resource_uri, "recursive": recursive}) + return self.result + + +@pytest.fixture +def request_context(): + return RequestContext( + user=UserIdentifier("default", "ryoma"), + role=Role.USER, + ) + + +@pytest.mark.asyncio +async def test_resource_rm_enqueues_parent_delete_refresh_and_waits(request_context): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock(return_value={"Semantic": {"pending_count": 0}}) + + uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + result = await service.rm( + uri, + ctx=request_context, + recursive=True, + wait=True, + timeout=12.0, + ) + + assert viking_fs.rm_calls == [{"uri": uri, "recursive": True, "ctx": request_context}] + service._enqueue_delete_refresh.assert_awaited_once_with( + root_uri="viking://resources/images/2026/06/10", + deleted_uri=uri, + context_type="resource", + ctx=request_context, + ) + service._wait_for_refresh.assert_awaited_once_with(timeout=12.0) + assert result["semantic_root_uri"] == "viking://resources/images/2026/06/10" + assert result["semantic_status"] == "complete" + assert result["queue_status"] == {"Semantic": {"pending_count": 0}} + + +@pytest.mark.asyncio +async def test_resource_rm_without_wait_only_queues_refresh(request_context): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock() + + uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + result = await service.rm(uri, ctx=request_context, recursive=True) + + service._enqueue_delete_refresh.assert_awaited_once() + service._wait_for_refresh.assert_not_awaited() + assert result["semantic_status"] == "queued" + + +@pytest.mark.asyncio +async def test_resource_rm_refreshes_memory_overview_for_cleaned_memories( + request_context, + monkeypatch, +): + cleanup = { + "status": "success", + "memory_uris": [ + "viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test.md" + ], + "deleted_memory_uris": [ + "viking://user/ryoma/memories/entities/动漫角色/不二周助-link-test2.md" + ], + } + viking_fs = _FakeVikingFS() + link_service = _FakeResourceMemoryLinkService(cleanup) + service = FSService( + viking_fs=viking_fs, + resource_memory_link_service=link_service, + ) + service._enqueue_delete_refresh = AsyncMock() + + refreshed = [] + + async def fake_refresh_schema_overview(*, viking_fs, directory_uri, ctx): + refreshed.append({"viking_fs": viking_fs, "directory_uri": directory_uri, "ctx": ctx}) + + monkeypatch.setattr( + "openviking.service.fs_service.MemoryUpdater.refresh_schema_overview", + fake_refresh_schema_overview, + ) + + uri = "viking://resources/images/2026/06/11/不二周助_jpeg" + result = await service.rm(uri, ctx=request_context, recursive=True) + + assert link_service.calls == [ + {"ctx": request_context, "resource_uri": uri, "recursive": True} + ] + assert refreshed == [ + { + "viking_fs": viking_fs, + "directory_uri": "viking://user/ryoma/memories/entities/动漫角色", + "ctx": request_context, + } + ] + assert result["memory_cleanup"] == cleanup From 09c353c4c167edac3ce9fec727b1cb1039088083 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Thu, 11 Jun 2026 18:13:51 +0800 Subject: [PATCH 05/19] =?UTF-8?q?=E6=9B=B4=E5=A5=BD=E7=9A=84=E5=88=A9?= =?UTF-8?q?=E7=94=A8=E6=97=B6=E9=97=B4=EF=BC=8C=E6=91=98=E8=A6=81=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E8=BF=9B=E8=A1=8Cmemory=E6=8F=90=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../processing/resource_linking.yaml | 26 +++- .../service/resource_memory_link_service.py | 125 +++++++++++++++- openviking/session/memory/memory_updater.py | 14 +- .../test_resource_memory_link_service.py | 138 ++++++++++++++++++ tests/session/memory/test_memory_updater.py | 39 +++++ 5 files changed, 329 insertions(+), 13 deletions(-) diff --git a/openviking/prompts/templates/processing/resource_linking.yaml b/openviking/prompts/templates/processing/resource_linking.yaml index c89da7a617..06f9b51651 100644 --- a/openviking/prompts/templates/processing/resource_linking.yaml +++ b/openviking/prompts/templates/processing/resource_linking.yaml @@ -28,11 +28,23 @@ variables: required: false default: "" + - name: "added_at" + type: "string" + description: "Resource addition time" + required: false + default: "" + + - name: "resource_abstract" + type: "string" + description: "Optional directory abstract near the resource" + required: false + default: "" + template: | You are a memory extraction agent for a resource-addition event. ## Objective - Create or update user memories using ONLY the user-provided reason and the resource URI. + Create or update user memories using ONLY the user-provided reason, the resource URI, the resource addition time, and the optional resource abstract. ## Target Output Language All memory content MUST be written in {{ output_language }}. @@ -40,18 +52,26 @@ template: | ## Resource Addition Resource URI: {{ resource_uri }} Source name: {{ source_name or "N/A" }} + Added at: {{ added_at or "N/A" }} + Resource abstract: {{ resource_abstract or "N/A" }} Reason: {{ reason }} ## Rules - Do NOT read, summarize, OCR, infer, or expand the resource file content. - - Treat the reason as the only semantic evidence. + - Treat the reason as the primary semantic evidence. + - Treat Added at as the time when the user added/uploaded this resource, not the time when the resource content was created, captured, signed, or happened. + - Treat Resource abstract as weak supporting context for a short resource descriptor only. Use it only when the descriptor adds non-redundant readability beyond the reason or existing memory; do not infer user facts from it unless the reason supports them. - Choose the most appropriate existing user memory type from the output schema, such as profile, entities, events, or preferences. - If the reason is not worth remembering, output no memory changes. - Create/edit visible memory as durable natural sentences preserving user intent/judgment; rewrite terse resource labels. - Example: reason "page 3 total should be 42" -> "User said page 3 total should be 42", not "stored report resource". + - When Source name alone is opaque, prefer one short neutral descriptor from Resource abstract if it helps readers understand what the resource is. Keep it under 12 Chinese characters or under 8 English words. + - Example: reason "这个项目是张三的项目", source "Apollo", abstract "Apollo is a config service..." -> "用户添加了 Apollo(配置服务项目),并说明它是张三的项目。" + - Omit the resource description if it merely repeats the subject, media type, or facts already stated by the reason or visible memory. + - Use the added date in visible memory only when it improves human readability or the memory would otherwise be temporally ambiguous. - When editing existing memory, merge with it; never replace it with only the newest resource or enumerate/count resources. - Use the Resource URI only as resource identity metadata. - - Do NOT include raw resource URIs, file paths, or generated links in visible memory content. + - Do NOT include raw resource URIs, file paths, generated links, or raw Resource abstract text in visible memory content. - Do NOT claim that you inspected the resource itself. - Return only memory operations that are grounded in the reason. diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index a694033998..775aae8bb0 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -38,6 +38,11 @@ logger = get_logger(__name__) RESOURCE_REF_SOURCE = "add_resource.reason" +_RESOURCE_ABSTRACT_MAX_CHARS = 200 +_ABSTRACT_NOT_READY_MARKERS = ( + "[.abstract.md is not ready]", + "[Directory abstract is not ready]", +) @dataclass @@ -56,20 +61,27 @@ def __init__( resource_uri: str, reason: str, source_name: Optional[str], + added_at: Optional[str] = None, + resource_abstract: Optional[str] = None, **kwargs: Any, ): self.resource_uri = resource_uri self.reason = reason self.source_name = source_name or "" + self.added_at = added_at or "" + self.resource_abstract = resource_abstract or "" messages = [ Message( id="resource-linking", role="user", + created_at=self.added_at or None, parts=[ TextPart( text=( "Resource URI: " - f"{resource_uri}\nReason: {reason}\nSource name: {self.source_name}" + f"{resource_uri}\nReason: {reason}\nSource name: {self.source_name}\n" + f"Added at: {self.added_at or 'N/A'}\n" + f"Resource abstract: {self.resource_abstract or 'N/A'}" ) ) ], @@ -85,6 +97,8 @@ def instruction(self) -> str: "resource_uri": self.resource_uri, "reason": self.reason, "source_name": self.source_name, + "added_at": self.added_at, + "resource_abstract": self.resource_abstract, }, ) @@ -95,17 +109,31 @@ def _build_conversation_message(self) -> Dict[str, Any]: "## Resource Addition\n" f"Resource URI: {self.resource_uri}\n" f"Reason: {self.reason}\n" - f"Source name: {self.source_name or 'N/A'}\n\n" + f"Source name: {self.source_name or 'N/A'}\n" + f"Added at: {self.added_at or 'N/A'}\n" + f"Resource abstract: {self.resource_abstract or 'N/A'}\n\n" "Analyze only this resource addition record and output all memory " "write/edit/delete operations in a single JSON response." ), } def _build_prefetch_search_query(self) -> str: - return "\n".join(part for part in [self.reason, self.source_name] if part).strip() + return "\n".join( + part for part in [self.reason, self.source_name, self.resource_abstract] if part + ).strip() def get_conversation_text(self) -> str: - return f"{self.reason}\n{self.resource_uri}\n{self.source_name}".strip() + return "\n".join( + part + for part in [ + self.reason, + self.resource_uri, + self.source_name, + self.added_at, + self.resource_abstract, + ] + if part + ).strip() def _detect_language(self) -> str: from openviking.session.memory.utils import resolve_output_language @@ -225,10 +253,14 @@ async def on_resource_added( if not resource_uri: return {"status": "skipped", "reason": "empty_resource_uri"} + added_at = datetime.now(timezone.utc).isoformat() + resource_abstract = await self._read_resource_directory_abstract(resource_uri, ctx) provider = _ResourceLinkingProvider( resource_uri=resource_uri, reason=reason, source_name=source_name, + added_at=added_at, + resource_abstract=resource_abstract, ctx=ctx, viking_fs=self._get_viking_fs(), ) @@ -254,6 +286,7 @@ async def on_resource_added( resource_uri=resource_uri, reason=reason, ctx=ctx, + created_at=added_at, ) missing_uri = await self._memory_files_missing_resource_uri(changed_uris, resource_uri, ctx) return { @@ -300,6 +333,8 @@ async def before_resource_delete( ) cleaned.extend(cleanup_result.written_uris + cleanup_result.edited_uris) deleted.extend(cleanup_result.deleted_uris) + if memory_uri in cleanup_result.deleted_uris: + continue if not cleanup_result.has_changes(): await self._remove_resource_refs(memory_uri, resource_uri, ctx) cleaned.append(memory_uri) @@ -402,6 +437,8 @@ async def _cleanup_memory_reference( await self._remove_resource_refs(uri, resource_uri, ctx) if uri == memory_uri: await self._restore_cleanup_metadata(uri, memory_file, ctx) + if await self._delete_empty_cleanup_memory(uri, ctx): + self._mark_result_deleted(result, uri) return result async def _restore_cleanup_metadata( @@ -425,6 +462,35 @@ async def _restore_cleanup_metadata( mf.backlinks = [] await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + async def _delete_empty_cleanup_memory(self, memory_uri: str, ctx: RequestContext) -> bool: + """Delete memory files whose visible content was emptied by resource cleanup.""" + if context_type_for_uri(memory_uri) != "memory": + return False + viking_fs = self._get_viking_fs() + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + except (NotFoundError, FileNotFoundError): + return True + mf = MemoryFileUtils.read(raw, uri=memory_uri) + if (mf.content or "").strip(): + return False + directory_uri = memory_uri.rsplit("/", 1)[0] + await viking_fs.rm(memory_uri, recursive=False, ctx=ctx) + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=directory_uri, + ctx=ctx, + ) + logger.info("Deleted empty memory after resource cleanup: %s", memory_uri) + return True + + @staticmethod + def _mark_result_deleted(result: MemoryUpdateResult, uri: str) -> None: + result.written_uris = [item for item in result.written_uris if item != uri] + result.edited_uris = [item for item in result.edited_uris if item != uri] + if uri not in result.deleted_uris: + result.add_deleted(uri) + async def _append_resource_refs( self, *, @@ -432,9 +498,10 @@ async def _append_resource_refs( resource_uri: str, reason: str, ctx: RequestContext, + created_at: Optional[str] = None, ) -> None: viking_fs = self._get_viking_fs() - created_at = datetime.now(timezone.utc).isoformat() + created_at = created_at or datetime.now(timezone.utc).isoformat() for memory_uri in dict.fromkeys(memory_uris): if context_type_for_uri(memory_uri) != "memory": continue @@ -528,6 +595,54 @@ async def _find_referencing_memories( matches.append(_MemoryRefMatch(uri, mf, ref)) return matches + async def _read_resource_directory_abstract( + self, + resource_uri: str, + ctx: RequestContext, + ) -> str: + """Best-effort directory abstract lookup for resource-addition readability.""" + viking_fs = self._get_viking_fs() + for abstract_uri in self._resource_abstract_uri_candidates(resource_uri): + try: + abstract = await viking_fs.read_file(abstract_uri, ctx=ctx) + except Exception: + continue + abstract = self._clean_resource_abstract(abstract) + if abstract: + return abstract + return "" + + @classmethod + def _resource_abstract_uri_candidates(cls, resource_uri: str) -> List[str]: + normalized = (resource_uri or "").strip().rstrip("/") + if not normalized: + return [] + candidates = [f"{normalized}/.abstract.md"] + parent = cls._parent_uri(normalized) + if parent: + candidates.append(f"{parent}/.abstract.md") + return list(dict.fromkeys(candidates)) + + @staticmethod + def _parent_uri(uri: str) -> str: + scheme_index = uri.find("://") + min_slash_index = scheme_index + 3 if scheme_index >= 0 else 0 + slash_index = uri.rfind("/") + if slash_index <= min_slash_index: + return "" + return uri[:slash_index] + + @staticmethod + def _clean_resource_abstract(abstract: Any) -> str: + text = " ".join(str(abstract or "").split()) + if not text: + return "" + if any(text == marker or text.endswith(marker) for marker in _ABSTRACT_NOT_READY_MARKERS): + return "" + if len(text) > _RESOURCE_ABSTRACT_MAX_CHARS: + return text[: _RESOURCE_ABSTRACT_MAX_CHARS - 3].rstrip() + "..." + return text + async def _memory_files_missing_resource_uri( self, memory_uris: Iterable[str], diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 5e4ed15f9a..068638d899 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -1045,15 +1045,19 @@ async def generate_overview( # If no memory files, delete the .overview.md and the directory if empty if not md_files: overview_path = f"{directory.rstrip('/')}/.overview.md" + can_delete_directory = all( + entry.get("name", "") in {"", ".overview.md"} for entry in entries + ) try: - await viking_fs.delete_file(overview_path, ctx=ctx) + await viking_fs.rm(overview_path, recursive=False, ctx=ctx) except Exception: pass # Try to delete empty directory - try: - await viking_fs.delete_file(directory, ctx=ctx) - except Exception: - pass + if can_delete_directory: + try: + await viking_fs.rm(directory, recursive=True, ctx=ctx) + except Exception: + pass return # Parse each file and collect items diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index e4dc481564..4fdfa50d4d 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -20,6 +20,7 @@ class _FakeVikingFS: def __init__(self, store): self.store = store + self.rm_calls = [] async def read_file(self, uri, ctx=None): return self.store[uri] @@ -27,6 +28,10 @@ async def read_file(self, uri, ctx=None): async def write_file(self, uri, content, ctx=None): self.store[uri] = content + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + self.store.pop(uri, None) + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): prefix = uri.rstrip("/") + "/" return [ @@ -96,6 +101,8 @@ def test_resource_linking_provider_exposes_resource_uri_only_as_metadata(): resource_uri=resource_uri, reason="这是越前龙马的照片", source_name="yueqian.jpeg", + added_at="2026-06-11T08:00:00+00:00", + resource_abstract="动漫角色照片合集", ) message_text = "\n".join( @@ -110,6 +117,13 @@ def test_resource_linking_provider_exposes_resource_uri_only_as_metadata(): assert resource_uri in provider._build_conversation_message()["content"] assert resource_uri in provider.get_conversation_text() assert resource_uri in message_text + assert "2026-06-11T08:00:00+00:00" in instruction + assert "动漫角色照片合集" in instruction + assert ( + "Added at: 2026-06-11T08:00:00+00:00" + in provider._build_conversation_message()["content"] + ) + assert "Resource abstract: 动漫角色照片合集" in message_text assert "include the exact Resource URI in the visible memory content" not in instruction assert "Use the Resource URI only as resource identity metadata" in instruction assert "Do NOT include raw resource URIs" in instruction @@ -131,6 +145,61 @@ def test_resource_linking_prompt_prefers_natural_sentence_over_terse_label(): assert "merge with it" in instruction assert "only the newest resource" in instruction assert "enumerate/count resources" in instruction + assert "under 12 Chinese characters" in instruction + assert "under 8 English words" in instruction + assert "weak supporting context" in instruction + assert "short resource descriptor only" in instruction + assert "adds non-redundant readability" in instruction + assert "Source name alone is opaque" in instruction + assert "配置服务项目" in instruction + assert "merely repeats the subject, media type, or facts" in instruction + assert "角色照片" not in instruction + assert "身份证" not in instruction + + +@pytest.mark.asyncio +async def test_read_resource_directory_abstract_uses_parent_abstract(request_context): + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({"viking://resources/images/.abstract.md": "动漫角色照片合集"}) + ) + + abstract = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert abstract == "动漫角色照片合集" + + +@pytest.mark.asyncio +async def test_read_resource_directory_abstract_ignores_missing_or_not_ready( + request_context, +): + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({})) + + missing = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert missing == "" + + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS( + { + "viking://resources/images/.abstract.md": ( + "# viking://resources/images [Directory abstract is not ready]" + ) + } + ) + ) + + not_ready = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert not_ready == "" @pytest.mark.asyncio @@ -346,6 +415,75 @@ async def fake_apply_memory_operations(**kwargs): assert mf.memory_type is None +@pytest.mark.asyncio +async def test_cleanup_memory_reference_deletes_empty_memory_shell( + request_context, + monkeypatch, +): + memory_uri = "viking://user/ryoma/memories/entities/动漫角色/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" + original_raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"[用户保存了一张越前龙马的照片]({resource_uri})", + extra_fields={ + "category": "动漫角色", + "name": "越前龙马", + "user_id": "ryoma", + "memory_type": "entities", + }, + ) + ) + store = {memory_uri: original_raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + service._run_extract_loop = AsyncMock(return_value=(object(), object(), object())) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + async def fake_apply_memory_operations(**kwargs): + store[memory_uri] = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content="", + memory_type="entities", + extra_fields={ + "category": "动漫角色", + "name": "越前龙马", + "user_id": "ryoma", + "memory_type": "entities", + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": "add_resource.reason", + } + ], + }, + ) + ) + result = MemoryUpdateResult() + result.add_edited(memory_uri) + return result + + service._apply_memory_operations = AsyncMock(side_effect=fake_apply_memory_operations) + + result = await service._cleanup_memory_reference( + ctx=request_context, + memory_uri=memory_uri, + memory_file=MemoryFileUtils.read(original_raw, uri=memory_uri), + resource_uri=resource_uri, + reason="这是越前龙马的照片", + ) + + assert memory_uri not in store + assert service._get_viking_fs().rm_calls == [(memory_uri, False)] + assert result.edited_uris == [] + assert result.deleted_uris == [memory_uri] + refresh_overview.assert_awaited_once() + + @pytest.mark.asyncio async def test_assert_resource_unlinked_propagates_non_not_found_errors(request_context): service = ResourceMemoryLinkService(viking_fs=_ReadFailVikingFS()) diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index d7ad167eb2..a76187b40d 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -125,6 +125,45 @@ def test_set_registry(self): assert updater._registry == registry + @pytest.mark.asyncio + async def test_generate_overview_deletes_empty_overview_via_rm(self): + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + class FakeVikingFS: + def __init__(self): + self.rm_calls = [] + + async def ls(self, uri, show_all_hidden=False, ctx=None): + return [{"name": ".overview.md", "isDir": False}] + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + + viking_fs = FakeVikingFS() + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=viking_fs) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.generate_overview( + "entities", + "viking://user/alice/memories/entities/动漫角色", + ctx, + ) + + assert viking_fs.rm_calls == [ + ("viking://user/alice/memories/entities/动漫角色/.overview.md", False), + ("viking://user/alice/memories/entities/动漫角色", True), + ] + @pytest.mark.asyncio async def test_apply_operations_preserves_pre_resolved_multi_uris_for_new_page_ids(self): registry = MagicMock() From 4430b938f9d48a5807bb2d1e26120ed7614d5eaa Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 00:44:22 +0800 Subject: [PATCH 06/19] =?UTF-8?q?=E5=90=88=E5=B9=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/ov_cli/src/base_client.rs | 5 +- crates/ov_cli/src/help_ui.rs | 4 + .../models/vlm/backends/volcengine_vlm.py | 28 +- .../memory/resource_link_memories.yaml | 24 + .../resource_link_memory_compaction.yaml | 88 +++ openviking/service/core.py | 1 + .../service/resource_link_memory_compactor.py | 662 ++++++++++++++++++ .../service/resource_memory_link_service.py | 23 + openviking/session/memory/memory_updater.py | 3 + .../test_resource_link_memory_compactor.py | 290 ++++++++ .../test_resource_memory_link_service.py | 60 ++ tests/session/memory/test_memory_updater.py | 37 + tests/unit/test_extra_headers_vlm.py | 46 ++ 13 files changed, 1261 insertions(+), 10 deletions(-) create mode 100644 openviking/prompts/templates/memory/resource_link_memories.yaml create mode 100644 openviking/prompts/templates/processing/resource_link_memory_compaction.yaml create mode 100644 openviking/service/resource_link_memory_compactor.py create mode 100644 tests/service/test_resource_link_memory_compactor.py diff --git a/crates/ov_cli/src/base_client.rs b/crates/ov_cli/src/base_client.rs index f8419447a7..15ebb9f6b4 100644 --- a/crates/ov_cli/src/base_client.rs +++ b/crates/ov_cli/src/base_client.rs @@ -145,6 +145,7 @@ pub struct BaseClient { pub(crate) api_key: Option, pub(crate) account: Option, pub(crate) user: Option, + timeout_secs: f64, pub(crate) profile_enabled: bool, pub(crate) extra_headers: Option>, } @@ -170,6 +171,7 @@ impl BaseClient { api_key, account, user, + timeout_secs, profile_enabled, extra_headers, } @@ -358,7 +360,8 @@ impl BaseClient { timeout: std::time::Duration, ) -> Result { let url = format!("{}{}", self.base_url, path); - let client = self.create_client_with_timeout(timeout)?; + let configured_timeout = std::time::Duration::from_secs_f64(self.timeout_secs); + let client = self.create_client_with_timeout(std::cmp::max(timeout, configured_timeout))?; let request = client.post(&url).headers(self.build_headers()).json(body); let request = if self.profile_enabled { diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index c246c9162d..8cda47a8a9 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -385,6 +385,10 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ label: "--wait", description: "Wait until indexing/processing completes.", }, + HelpItem { + label: "--timeout ", + description: "Maximum wait time when using --wait.", + }, HelpItem { label: "--include / --exclude", description: "Filter files during folder import.", diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index f3f96b6fd4..cda87cd662 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -31,6 +31,16 @@ def __init__(self, config: Dict[str, Any]): if not self.model: self.model = "doubao-seed-2-0-pro-260215" + def _ark_client_kwargs(self) -> Dict[str, Any]: + return { + "api_key": self.api_key, + "base_url": self.api_base, + "timeout": self.timeout, + # OpenViking owns retry/backoff behavior. Disable SDK retries to + # keep request duration bounded by the configured timeout. + "max_retries": 0, + } + def _parse_tool_calls(self, message) -> List[ToolCall]: """Parse tool calls from VolcEngine response message.""" tool_calls = [] @@ -78,10 +88,7 @@ def get_client(self): raise ImportError( "Please install volcenginesdkarkruntime: pip install volcenginesdkarkruntime" ) - self._sync_client = volcenginesdkarkruntime.Ark( - api_key=self.api_key, - base_url=self.api_base, - ) + self._sync_client = volcenginesdkarkruntime.Ark(**self._ark_client_kwargs()) return self._sync_client def _build_async_client(self): @@ -92,9 +99,12 @@ def _build_async_client(self): raise ImportError( "Please install volcenginesdkarkruntime: pip install volcenginesdkarkruntime" ) - return volcenginesdkarkruntime.AsyncArk( - api_key=self.api_key, - base_url=self.api_base, + return volcenginesdkarkruntime.AsyncArk(**self._ark_client_kwargs()) + + async def _create_chat_completion_async(self, client: Any, kwargs: Dict[str, Any]) -> Any: + return await asyncio.wait_for( + client.chat.completions.create(**kwargs), + timeout=self.timeout, ) def get_completion( @@ -167,7 +177,7 @@ async def get_completion_async( for attempt in range(self.max_retries + 1): try: t0 = time.perf_counter() - response = await client.chat.completions.create(**kwargs) + response = await self._create_chat_completion_async(client, kwargs) elapsed = time.perf_counter() - t0 self._update_token_usage_from_response(response, duration_seconds=elapsed) result = self._build_vlm_response(response, has_tools=bool(tools)) @@ -379,7 +389,7 @@ async def get_vision_completion_async( client = self.get_async_client() t0 = time.perf_counter() - response = await client.chat.completions.create(**kwargs) + response = await self._create_chat_completion_async(client, kwargs) elapsed = time.perf_counter() - t0 self._update_token_usage_from_response(response, duration_seconds=elapsed) result = self._build_vlm_response(response, has_tools=bool(tools)) diff --git a/openviking/prompts/templates/memory/resource_link_memories.yaml b/openviking/prompts/templates/memory/resource_link_memories.yaml new file mode 100644 index 0000000000..ea29927cc4 --- /dev/null +++ b/openviking/prompts/templates/memory/resource_link_memories.yaml @@ -0,0 +1,24 @@ +memory_type: resource_link_memories +description: | + System-managed aggregate memories for resources added with user reasons. +directory: "viking://user/{{ user_space }}/memories/resource_link_memories" +filename_template: "{{ topic }}.md" +enabled: true +agent_only: true +embedding_template: |- + {{ topic }} + + {{ content }} + +fields: + - name: topic + type: string + description: | + Short topic title for a resource-link aggregate memory. + merge_op: immutable + + - name: content + type: string + description: | + Human-readable aggregate summary for resource additions, with a few representative resource links. + merge_op: patch diff --git a/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml b/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml new file mode 100644 index 0000000000..bf53867177 --- /dev/null +++ b/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml @@ -0,0 +1,88 @@ +metadata: + id: "processing.resource_link_memory_compaction" + name: "Resource Link Memory Compaction" + description: "Compact many resource-link memories into a few readable aggregate memories" + version: "1.0.0" + language: "en" + category: "processing" + +variables: + - name: "output_language" + type: "string" + description: "Target language for memory content" + required: true + + - name: "target_memory_count" + type: "string" + description: "Maximum number of aggregate memories to return" + required: true + + - name: "max_resource_links" + type: "string" + description: "Maximum representative links per aggregate memory" + required: true + + - name: "user_id" + type: "string" + description: "The single OpenViking user that owns all input memories" + required: true + + - name: "aggregate_memories_json" + type: "string" + description: "Existing aggregate memories as JSON" + required: true + + - name: "resource_items_json" + type: "string" + description: "New single resource-link memories as JSON" + required: true + +template: | + You compact resource-addition memories. + + ## Objective + Rewrite the existing aggregate memories plus the new single resource memories into at most {{ target_memory_count }} readable aggregate memories. + For a broad collection with clear subtopics, prefer 2-3 aggregate memories instead of one oversized memory. + + ## Target Output Language + All titles and content MUST be written in {{ output_language }}. + + ## User Context + All input memories belong to one OpenViking user: {{ user_id }}. + Names mentioned in reasons/content are people, subjects, authors, photographers, or described actors from that user's resource descriptions. They are NOT OpenViking users unless the input explicitly says so. + + ## Existing Aggregate Memories + {{ aggregate_memories_json }} + + ## New Single Resource Memories + {{ resource_items_json }} + + ## Rules + - Preserve the user's intent, judgments, people, places, times, and topics when they are present. + - Do not say "multiple users", "N users", or "each user" unless the input explicitly refers to multiple OpenViking users. Prefer phrasing like "用户上传/保存了一组..." for this user's resources. + - Do not enumerate every resource. Group resources by meaningful topics such as person, place, time, scene, project, document, or user judgment. + - When many resources share a broad category but have distinct subthemes, split them into 2-3 concise topic memories. Example: city landmarks, natural landscapes, cultural/historic sites. + - If the resources are weakly related, create a concise "mixed resources" style topic instead of forcing false similarity. + - Each aggregate memory should be useful to a human reader and should mention approximate coverage, themes, and representative examples. + - Include at most {{ max_resource_links }} markdown resource links per aggregate memory. + - Use only resource URIs provided in the input, and only as markdown links. + - Do not include raw JSON, raw metadata, or raw URI lists in visible content. + - Do not claim that you inspected resource file contents. + - Keep titles short and file-name friendly: at most 16 Chinese characters or 8 English words. Omit upload dates from titles unless needed to distinguish topics. + - Keep each content under 300 Chinese characters or 180 English words. + + ## Output JSON + Return ONLY a JSON object: + { + "memories": [ + { + "title": "short topic title, safe as a file name", + "content": "markdown content with a few representative links", + "resource_uris": ["viking://resources/..."], + "item_count": 12 + } + ] + } + +llm_config: + temperature: 0.0 diff --git a/openviking/service/core.py b/openviking/service/core.py index d44db32fc9..9e7ed58892 100644 --- a/openviking/service/core.py +++ b/openviking/service/core.py @@ -423,6 +423,7 @@ async def initialize(self) -> None: self._resource_memory_link_service.set_dependencies( vikingdb=self._vikingdb_manager, viking_fs=self._viking_fs, + queue_manager=self._queue_manager, ) self._relation_service.set_viking_fs(self._viking_fs) self._pack_service.set_dependencies( diff --git a/openviking/service/resource_link_memory_compactor.py b/openviking/service/resource_link_memory_compactor.py new file mode 100644 index 0000000000..95c683b54f --- /dev/null +++ b/openviking/service/resource_link_memory_compactor.py @@ -0,0 +1,662 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Compact resource-linked memories created from add-resource reasons.""" + +from __future__ import annotations + +import hashlib +import json +import re +import threading +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Sequence + +from pydantic import BaseModel, Field + +from openviking.core.namespace import canonical_user_root, context_type_for_uri +from openviking.models.vlm.base import VLMResponse +from openviking.prompts.manager import render_prompt +from openviking.server.identity import RequestContext, Role +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.memory_updater import MemoryUpdater +from openviking.session.memory.utils import resolve_output_language +from openviking.session.memory.utils.json_parser import parse_json_with_stability +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.storage import VikingDBManager +from openviking.storage.queuefs.named_queue import DequeueHandlerBase +from openviking.storage.queuefs.queue_manager import QueueManager +from openviking.storage.transaction.lock_context import LockContext +from openviking.storage.transaction.lock_manager import get_lock_manager +from openviking.storage.viking_fs import VikingFS, get_viking_fs +from openviking_cli.exceptions import NotFoundError +from openviking_cli.session.user_id import UserIdentifier +from openviking_cli.utils import get_logger +from openviking_cli.utils.config import get_openviking_config + +logger = get_logger(__name__) + +RESOURCE_LINK_MEMORY_TYPE = "resource_link_memories" +RESOURCE_LINK_COMPACTION_QUEUE = "ResourceLinkCompaction" +RESOURCE_LINK_MANAGED_FIELD = "resource_link_managed" +RESOURCE_LINK_SOURCE_FIELD = "resource_link_source" +RESOURCE_LINK_CREATED_AT_FIELD = "resource_link_created_at" +RESOURCE_LINK_STATE_FIELD = "resource_link_state" +RESOURCE_LINK_COMPACTION_REF_SOURCE = "resource_link.compaction" + +_COMPACTION_THRESHOLD = 10 +_COMPACTION_BATCH_SIZE = 50 +_TARGET_AGGREGATE_MEMORY_COUNT = 3 +_MAX_REPRESENTATIVE_LINKS = 5 +_MAX_REASON_CHARS = 180 +_MAX_AGGREGATE_CONTENT_CHARS = 1200 +_MAX_VISIBLE_CONTENT_CHARS = 360 +_MAX_TITLE_CHARS = 24 +_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") +_UNSAFE_FILENAME_CHARS_RE = re.compile(r"[\\/:\*\?\"<>\|\n\r\t]+") +_TITLE_DATE_PREFIX_RE = re.compile( + r"^(?:\d{4}年\d{1,2}月\d{1,2}日|\d{4}[-/]\d{1,2}[-/]\d{1,2})" +) +_TITLE_USER_ACTION_PREFIX_RE = re.compile(r"^(?:用户|该用户|当前用户)(?:上传|保存|添加|导入)的?") + + +@dataclass +class _CompactionCandidate: + uri: str + raw_hash: str + content: str + extra_fields: Dict[str, Any] + resource_refs: List[Dict[str, Any]] + resource_uris: List[str] + + @property + def item_count(self) -> int: + state = self.extra_fields.get(RESOURCE_LINK_STATE_FIELD) + if isinstance(state, dict): + try: + return max(1, int(state.get("item_count") or 0)) + except (TypeError, ValueError): + pass + return 1 + + +class _CompactedMemory(BaseModel): + title: str = "" + content: str = "" + resource_uris: List[str] = Field(default_factory=list) + item_count: int = 0 + + +class _CompactionResponse(BaseModel): + memories: List[_CompactedMemory] = Field(default_factory=list) + + +class ResourceLinkMemoryCompactor(DequeueHandlerBase): + """Batch compact system-managed resource-link memories.""" + + def __init__( + self, + *, + vikingdb: Optional[VikingDBManager] = None, + viking_fs: Optional[VikingFS] = None, + queue_manager: Optional[QueueManager] = None, + ): + self._vikingdb = vikingdb + self._viking_fs = viking_fs + self._queue_manager = queue_manager + self._coalesce_versions: Dict[str, int] = {} + self._coalesce_lock = threading.Lock() + if queue_manager: + self._ensure_queue() + + def set_dependencies( + self, + *, + vikingdb: Optional[VikingDBManager], + viking_fs: VikingFS, + queue_manager: Optional[QueueManager], + ) -> None: + self._vikingdb = vikingdb + self._viking_fs = viking_fs + self._queue_manager = queue_manager + if queue_manager: + self._ensure_queue() + + def _get_viking_fs(self) -> VikingFS: + return self._viking_fs or get_viking_fs() + + def _ensure_queue(self) -> None: + if not self._queue_manager: + return + self._queue_manager.get_queue( + RESOURCE_LINK_COMPACTION_QUEUE, + dequeue_handler=self, + allow_create=True, + ) + + async def mark_managed_memories( + self, + *, + ctx: RequestContext, + memory_uris: Sequence[str], + created_at: str, + ) -> List[str]: + """Mark newly-created add-resource memories as eligible for later compaction.""" + marked: List[str] = [] + viking_fs = self._get_viking_fs() + for memory_uri in dict.fromkeys(memory_uris): + if context_type_for_uri(memory_uri) != "memory": + continue + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=memory_uri) + except Exception as exc: + logger.warning("Failed to mark resource-linked memory %s: %s", memory_uri, exc) + continue + if mf.memory_type == RESOURCE_LINK_MEMORY_TYPE: + continue + mf.extra_fields[RESOURCE_LINK_MANAGED_FIELD] = True + mf.extra_fields[RESOURCE_LINK_SOURCE_FIELD] = "add_resource.reason" + mf.extra_fields.setdefault(RESOURCE_LINK_CREATED_AT_FIELD, created_at) + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + marked.append(memory_uri) + return marked + + async def enqueue_check(self, *, ctx: RequestContext) -> Optional[str]: + """Enqueue a coalesced compaction check for the current user memory root.""" + if not self._queue_manager: + return None + self._ensure_queue() + key = self._coalesce_key(ctx) + with self._coalesce_lock: + version = self._coalesce_versions.get(key, 0) + 1 + self._coalesce_versions[key] = version + queue = self._queue_manager.get_queue( + RESOURCE_LINK_COMPACTION_QUEUE, + dequeue_handler=self, + allow_create=True, + ) + return await queue.enqueue( + { + "account_id": ctx.account_id, + "user_id": ctx.user.user_id, + "role": str(ctx.role.value if hasattr(ctx.role, "value") else ctx.role), + "coalesce_key": key, + "coalesce_version": version, + } + ) + + async def on_dequeue(self, data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + data = self._unwrap_queue_data(data) + if not data: + self.report_success() + return data + + key = str(data.get("coalesce_key") or "") + version = self._safe_int(data.get("coalesce_version"), default=0) + if self._is_stale(key, version): + self.report_success() + return data + + try: + ctx = RequestContext( + user=UserIdentifier(str(data["account_id"]), str(data["user_id"])), + role=Role(str(data.get("role") or Role.USER.value)), + ) + result = await self.compact_if_needed(ctx=ctx) + data["result"] = result + self.report_success() + except Exception as exc: + logger.warning("Resource-link compaction failed: %s", exc, exc_info=True) + data["error"] = str(exc) + self.report_error(str(exc), data) + return data + + async def compact_if_needed(self, *, ctx: RequestContext) -> Dict[str, Any]: + aggregate_dir_uri = self.aggregate_dir_uri(ctx) + try: + lock_manager = get_lock_manager() + lock_path = self._lock_path(aggregate_dir_uri, ctx) + except Exception: + lock_manager = None + lock_path = "" + + if lock_manager and lock_path: + async with LockContext(lock_manager, [lock_path], lock_mode="exact"): + return await self._compact_once(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) + return await self._compact_once(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) + + @staticmethod + def aggregate_dir_uri(ctx: RequestContext) -> str: + return f"{canonical_user_root(ctx)}/memories/{RESOURCE_LINK_MEMORY_TYPE}" + + async def _compact_once( + self, + *, + ctx: RequestContext, + aggregate_dir_uri: str, + ) -> Dict[str, Any]: + singles, aggregates = await self._scan_candidates(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) + total_memory_count = len(singles) + len(aggregates) + should_compact = ( + len(singles) >= _COMPACTION_THRESHOLD + or (len(singles) > 0 and total_memory_count >= _COMPACTION_THRESHOLD) + or len(aggregates) > _TARGET_AGGREGATE_MEMORY_COUNT + ) + if not should_compact: + return { + "status": "skipped", + "reason": "below_threshold", + "single_count": len(singles), + "aggregate_count": len(aggregates), + "total_memory_count": total_memory_count, + } + + batch = singles[:_COMPACTION_BATCH_SIZE] + response = await self._generate_compaction( + ctx=ctx, + batch=batch, + aggregates=aggregates, + ) + if not response.memories: + return { + "status": "skipped", + "reason": "empty_compaction_output", + "single_count": len(singles), + "aggregate_count": len(aggregates), + "total_memory_count": total_memory_count, + } + + written_uris = await self._write_aggregate_memories( + ctx=ctx, + aggregate_dir_uri=aggregate_dir_uri, + response=response, + input_item_count=sum(item.item_count for item in batch + aggregates), + ) + if not written_uris: + return {"status": "skipped", "reason": "no_aggregate_written"} + + target_uris = set(written_uris) + deleted_uris = await self._delete_compacted_inputs( + ctx=ctx, + candidates=[*batch, *aggregates], + keep_uris=target_uris, + ) + await self._refresh_deleted_parent_overviews(ctx=ctx, deleted_uris=deleted_uris) + + remaining_singles = max(0, len(singles) - len(batch)) + if remaining_singles >= _COMPACTION_THRESHOLD: + await self.enqueue_check(ctx=ctx) + + return { + "status": "success", + "written_uris": written_uris, + "deleted_uris": deleted_uris, + "remaining_single_count": remaining_singles, + } + + async def _scan_candidates( + self, + *, + ctx: RequestContext, + aggregate_dir_uri: str, + ) -> tuple[List[_CompactionCandidate], List[_CompactionCandidate]]: + viking_fs = self._get_viking_fs() + memory_root = f"{canonical_user_root(ctx)}/memories" + try: + entries = await viking_fs.tree( + memory_root, + ctx=ctx, + node_limit=1000000, + level_limit=None, + ) + except Exception as exc: + logger.warning("Failed to scan memories for resource-link compaction: %s", exc) + return [], [] + + singles: List[_CompactionCandidate] = [] + aggregates: List[_CompactionCandidate] = [] + for entry in entries: + uri = str(entry.get("uri") or "") + if not uri or bool(entry.get("isDir") or entry.get("is_dir")): + continue + if not uri.endswith(".md") or self._is_hidden_memory_file(uri): + continue + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=uri) + except Exception: + continue + + refs = self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) + resource_uris = self._resource_uris_from_memory(mf, refs) + candidate = _CompactionCandidate( + uri=uri, + raw_hash=self._hash_raw(raw), + content=mf.content or "", + extra_fields=dict(mf.extra_fields or {}), + resource_refs=refs, + resource_uris=resource_uris, + ) + if uri.startswith(aggregate_dir_uri.rstrip("/") + "/"): + aggregates.append(candidate) + elif mf.extra_fields.get(RESOURCE_LINK_MANAGED_FIELD) is True and refs: + singles.append(candidate) + + singles.sort(key=self._candidate_sort_key) + aggregates.sort(key=lambda item: item.uri) + return singles, aggregates + + async def _generate_compaction( + self, + *, + ctx: RequestContext, + batch: Sequence[_CompactionCandidate], + aggregates: Sequence[_CompactionCandidate], + ) -> _CompactionResponse: + prompt = render_prompt( + "processing.resource_link_memory_compaction", + { + "output_language": self._output_language(batch, aggregates), + "target_memory_count": str(_TARGET_AGGREGATE_MEMORY_COUNT), + "max_resource_links": str(_MAX_REPRESENTATIVE_LINKS), + "user_id": ctx.user.user_id, + "aggregate_memories_json": json.dumps( + [self._aggregate_prompt_item(item) for item in aggregates], + ensure_ascii=False, + indent=2, + ), + "resource_items_json": json.dumps( + [self._single_prompt_item(item) for item in batch], + ensure_ascii=False, + indent=2, + ), + }, + ) + content = await self._call_model(prompt) + parsed, error = parse_json_with_stability(content, _CompactionResponse) + if error or not parsed: + logger.warning("Failed to parse resource-link compaction output: %s", error) + return _CompactionResponse() + return parsed + + async def _call_model(self, prompt: str) -> str: + config = get_openviking_config() + vlm = config.vlm.get_vlm_instance() + response = await vlm.get_completion_async(prompt) + if isinstance(response, VLMResponse): + return response.content or "" + return str(response or "") + + async def _write_aggregate_memories( + self, + *, + ctx: RequestContext, + aggregate_dir_uri: str, + response: _CompactionResponse, + input_item_count: int, + ) -> List[str]: + now = datetime.now(timezone.utc).isoformat() + used_names: set[str] = set() + written_uris: List[str] = [] + viking_fs = self._get_viking_fs() + for index, memory in enumerate(response.memories[:_TARGET_AGGREGATE_MEMORY_COUNT], start=1): + title = self._clean_title(memory.title, index) + filename = self._unique_filename(title, used_names) + uri = f"{aggregate_dir_uri.rstrip('/')}/{filename}" + resource_uris = self._valid_resource_uris( + [*memory.resource_uris, *self._extract_resource_uris(memory.content)] + )[:_MAX_REPRESENTATIVE_LINKS] + content = self._truncate_text(memory.content.strip(), _MAX_VISIBLE_CONTENT_CHARS) + if not content: + continue + item_count = self._memory_item_count(memory, input_item_count) + mf = MemoryFile( + uri=uri, + content=content, + memory_type=RESOURCE_LINK_MEMORY_TYPE, + extra_fields={ + "topic": title, + RESOURCE_LINK_STATE_FIELD: { + "item_count": item_count, + "updated_at": now, + "representative_resources": [ + {"resource_uri": resource_uri} for resource_uri in resource_uris + ], + }, + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": RESOURCE_LINK_COMPACTION_REF_SOURCE, + "created_at": now, + "match_text": title, + } + for resource_uri in resource_uris + ], + }, + ) + await viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) + await MemoryUpdater.refresh_file_embedding( + viking_fs=viking_fs, + vikingdb=self._vikingdb, + uri=uri, + memory_type=RESOURCE_LINK_MEMORY_TYPE, + ctx=ctx, + ) + written_uris.append(uri) + return written_uris + + async def _delete_compacted_inputs( + self, + *, + ctx: RequestContext, + candidates: Sequence[_CompactionCandidate], + keep_uris: set[str], + ) -> List[str]: + deleted: List[str] = [] + viking_fs = self._get_viking_fs() + for candidate in candidates: + if candidate.uri in keep_uris: + continue + try: + current_raw = await viking_fs.read_file(candidate.uri, ctx=ctx) + except (NotFoundError, FileNotFoundError, KeyError): + continue + except Exception: + continue + if self._hash_raw(current_raw) != candidate.raw_hash: + logger.info("Skip deleting changed resource-link memory: %s", candidate.uri) + continue + try: + await viking_fs.rm(candidate.uri, recursive=False, ctx=ctx) + deleted.append(candidate.uri) + except (NotFoundError, FileNotFoundError, KeyError): + continue + return deleted + + async def _refresh_deleted_parent_overviews( + self, + *, + ctx: RequestContext, + deleted_uris: Sequence[str], + ) -> None: + viking_fs = self._get_viking_fs() + parent_dirs = { + uri.rsplit("/", 1)[0] + for uri in deleted_uris + if context_type_for_uri(uri) == "memory" and "/" in uri + } + for directory_uri in sorted(parent_dirs): + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=directory_uri, + ctx=ctx, + ) + + def _lock_path(self, aggregate_dir_uri: str, ctx: RequestContext) -> str: + viking_fs = self._get_viking_fs() + if hasattr(viking_fs, "_uri_to_path"): + return viking_fs._uri_to_path(aggregate_dir_uri, ctx=ctx) + return aggregate_dir_uri + + def _is_stale(self, key: str, version: int) -> bool: + if not key or version <= 0: + return False + with self._coalesce_lock: + return version < self._coalesce_versions.get(key, 0) + + @staticmethod + def _coalesce_key(ctx: RequestContext) -> str: + return f"{ctx.account_id}|{ctx.user.user_id}|{RESOURCE_LINK_MEMORY_TYPE}" + + @staticmethod + def _unwrap_queue_data(data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: + if not isinstance(data, dict): + return data + payload = data.get("data") + if isinstance(payload, str): + try: + parsed = json.loads(payload) + if isinstance(parsed, dict): + return parsed + except Exception: + return data + if isinstance(payload, dict): + return payload + return data + + @staticmethod + def _hash_raw(raw: Any) -> str: + if isinstance(raw, bytes): + data = raw + else: + data = str(raw or "").encode("utf-8") + return hashlib.sha256(data).hexdigest() + + @staticmethod + def _is_hidden_memory_file(uri: str) -> bool: + leaf = uri.rsplit("/", 1)[-1] + return leaf.startswith(".") or uri.endswith("/.overview.md") or uri.endswith("/.abstract.md") + + @staticmethod + def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + + @classmethod + def _resource_uris_from_memory( + cls, + mf: MemoryFile, + refs: Sequence[Dict[str, Any]], + ) -> List[str]: + uris = [str(ref.get("resource_uri") or "") for ref in refs] + uris.extend(cls._extract_resource_uris(mf.content or "")) + return cls._valid_resource_uris(uris) + + @staticmethod + def _valid_resource_uris(values: Sequence[str]) -> List[str]: + result: List[str] = [] + for value in values: + uri = str(value or "").strip().rstrip(".,;:!?,。;:!?") + if uri.startswith("viking://resources/") and uri not in result: + result.append(uri) + return result + + @staticmethod + def _extract_resource_uris(text: str) -> List[str]: + return _RESOURCE_URI_RE.findall(text or "") + + @classmethod + def _candidate_sort_key(cls, item: _CompactionCandidate) -> str: + values = [item.extra_fields.get(RESOURCE_LINK_CREATED_AT_FIELD)] + values.extend(ref.get("created_at") for ref in item.resource_refs) + for value in values: + if value: + return str(value) + return item.uri + + @classmethod + def _single_prompt_item(cls, item: _CompactionCandidate) -> Dict[str, Any]: + primary_ref = item.resource_refs[0] if item.resource_refs else {} + return { + "memory_uri": item.uri, + "content": cls._truncate_text(item.content, _MAX_REASON_CHARS * 2), + "reason": cls._truncate_text(str(primary_ref.get("reason") or ""), _MAX_REASON_CHARS), + "created_at": primary_ref.get("created_at") + or item.extra_fields.get(RESOURCE_LINK_CREATED_AT_FIELD) + or "", + "resource_uris": item.resource_uris[:_MAX_REPRESENTATIVE_LINKS], + "item_count": 1, + } + + @classmethod + def _aggregate_prompt_item(cls, item: _CompactionCandidate) -> Dict[str, Any]: + state = item.extra_fields.get(RESOURCE_LINK_STATE_FIELD) + return { + "memory_uri": item.uri, + "topic": item.extra_fields.get("topic") or item.uri.rsplit("/", 1)[-1].removesuffix(".md"), + "content": cls._truncate_text(item.content, _MAX_AGGREGATE_CONTENT_CHARS), + "resource_uris": item.resource_uris[:_MAX_REPRESENTATIVE_LINKS], + "item_count": item.item_count, + "state": state if isinstance(state, dict) else {}, + } + + @staticmethod + def _output_language( + batch: Sequence[_CompactionCandidate], + aggregates: Sequence[_CompactionCandidate], + ) -> str: + sample = "\n".join( + item.content for item in [*list(batch[:5]), *list(aggregates[:3])] if item.content + ) + return resolve_output_language(sample) + + @classmethod + def _clean_title(cls, title: str, index: int) -> str: + title = " ".join(str(title or "").split()) + title = _UNSAFE_FILENAME_CHARS_RE.sub("-", title).strip(" .-_") + title = _TITLE_DATE_PREFIX_RE.sub("", title).strip(" .-_") + title = _TITLE_USER_ACTION_PREFIX_RE.sub("", title).strip(" .-_") + if not title: + title = f"资源集合{index}" + return title[:_MAX_TITLE_CHARS].strip(" .-_") or f"资源集合{index}" + + @staticmethod + def _unique_filename(title: str, used_names: set[str]) -> str: + base = title.removesuffix(".md") + filename = f"{base}.md" + suffix = 2 + while filename in used_names: + filename = f"{base}-{suffix}.md" + suffix += 1 + used_names.add(filename) + return filename + + @staticmethod + def _memory_item_count(memory: _CompactedMemory, fallback_total: int) -> int: + try: + count = int(memory.item_count) + except (TypeError, ValueError): + count = 0 + if count > 0: + return count + if fallback_total > 0 and len(memory.resource_uris) <= 1: + return fallback_total + return max(1, len(memory.resource_uris)) + + @staticmethod + def _truncate_text(text: Any, max_chars: int) -> str: + value = " ".join(str(text or "").split()) + if len(value) <= max_chars: + return value + return value[: max_chars - 3].rstrip() + "..." + + @staticmethod + def _safe_int(value: Any, *, default: int) -> int: + try: + return int(value) + except (TypeError, ValueError): + return default diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index 775aae8bb0..bf73afeff8 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -18,6 +18,7 @@ from openviking.message.part import TextPart from openviking.prompts.manager import render_prompt from openviking.server.identity import RequestContext +from openviking.service.resource_link_memory_compactor import ResourceLinkMemoryCompactor from openviking.session.memory.dataclass import MemoryFile, ResolvedOperations from openviking.session.memory.extract_loop import ExtractLoop from openviking.session.memory.memory_isolation_handler import MemoryIsolationHandler @@ -30,6 +31,7 @@ from openviking.session.memory.utils.link_renderer import LinkRenderer from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils from openviking.storage import VikingDBManager +from openviking.storage.queuefs.queue_manager import QueueManager from openviking.storage.viking_fs import VikingFS, get_viking_fs from openviking_cli.exceptions import NotFoundError from openviking_cli.utils import VikingURI, get_logger @@ -222,18 +224,31 @@ def __init__( *, vikingdb: Optional[VikingDBManager] = None, viking_fs: Optional[VikingFS] = None, + queue_manager: Optional[QueueManager] = None, + compactor: Optional[ResourceLinkMemoryCompactor] = None, ): self._vikingdb = vikingdb self._viking_fs = viking_fs + self._compactor = compactor or ResourceLinkMemoryCompactor( + vikingdb=vikingdb, + viking_fs=viking_fs, + queue_manager=queue_manager, + ) def set_dependencies( self, *, vikingdb: Optional[VikingDBManager], viking_fs: VikingFS, + queue_manager: Optional[QueueManager] = None, ) -> None: self._vikingdb = vikingdb self._viking_fs = viking_fs + self._compactor.set_dependencies( + vikingdb=vikingdb, + viking_fs=viking_fs, + queue_manager=queue_manager, + ) def _get_viking_fs(self) -> VikingFS: return self._viking_fs or get_viking_fs() @@ -288,10 +303,18 @@ async def on_resource_added( ctx=ctx, created_at=added_at, ) + managed_uris = await self._compactor.mark_managed_memories( + ctx=ctx, + memory_uris=result.written_uris, + created_at=added_at, + ) + compaction_msg_id = await self._compactor.enqueue_check(ctx=ctx) missing_uri = await self._memory_files_missing_resource_uri(changed_uris, resource_uri, ctx) return { "status": "success" if not result.errors else "partial_success", "memory_uris": changed_uris, + "managed_memory_uris": managed_uris, + "compaction_msg_id": compaction_msg_id, "deleted_memory_uris": result.deleted_uris, "errors": [f"{uri}: {exc}" for uri, exc in result.errors], "missing_resource_uri_uris": missing_uri, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 068638d899..36d06da444 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -1038,6 +1038,9 @@ async def generate_overview( ): md_files.append(f"{base_uri}/{name}") + except (NotFoundError, FileNotFoundError): + logger.debug("Skip overview generation for deleted directory: %s", directory) + return except Exception as e: tracer.error(f"Failed to list files in {directory}: {e}") return diff --git a/tests/service/test_resource_link_memory_compactor.py b/tests/service/test_resource_link_memory_compactor.py new file mode 100644 index 0000000000..a4ffa5d81d --- /dev/null +++ b/tests/service/test_resource_link_memory_compactor.py @@ -0,0 +1,290 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for resource-link memory compaction.""" + +from unittest.mock import AsyncMock + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.service.resource_link_memory_compactor import ( + RESOURCE_LINK_MANAGED_FIELD, + RESOURCE_LINK_MEMORY_TYPE, + ResourceLinkMemoryCompactor, + _CompactedMemory, + _CompactionResponse, +) +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking_cli.session.user_id import UserIdentifier + + +class _FakeVikingFS: + def __init__(self, store): + self.store = store + self.rm_calls = [] + + async def read_file(self, uri, ctx=None): + return self.store[uri] + + async def write_file(self, uri, content, ctx=None): + self.store[uri] = content + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + self.store.pop(uri, None) + + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): + prefix = uri.rstrip("/") + "/" + return [ + { + "uri": item_uri, + "rel_path": item_uri.removeprefix(prefix), + "isDir": False, + } + for item_uri in list(self.store) + if item_uri.startswith(prefix) + ] + + +@pytest.fixture +def request_context(): + return RequestContext( + user=UserIdentifier("acct", "ryoma"), + role=Role.USER, + ) + + +def _managed_memory(uri: str, resource_uri: str, index: int) -> str: + return MemoryFileUtils.write( + MemoryFile( + uri=uri, + content=f"用户上传了一张角色{index}的照片。", + memory_type="entities", + extra_fields={ + "category": "动漫角色", + "name": f"角色{index}", + RESOURCE_LINK_MANAGED_FIELD: True, + "resource_refs": [ + { + "resource_uri": resource_uri, + "reason": f"这是角色{index}的照片", + "source": "add_resource.reason", + "created_at": f"2026-06-11T00:00:{index:02d}+00:00", + } + ], + }, + ) + ) + + +def _aggregate_memory(uri: str, resource_uri: str, item_count: int = 10) -> str: + return MemoryFileUtils.write( + MemoryFile( + uri=uri, + content="用户保存过一组全球地标风景照片。", + memory_type=RESOURCE_LINK_MEMORY_TYPE, + extra_fields={ + "topic": "全球地标风景照片", + "resource_link_state": {"item_count": item_count}, + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": "resource_link.compaction", + } + ], + }, + ) + ) + + +@pytest.mark.asyncio +async def test_compact_if_needed_writes_aggregate_and_deletes_managed_inputs( + request_context, + monkeypatch, +): + store = {} + for index in range(10): + memory_uri = f"viking://user/ryoma/memories/entities/动漫角色/角色{index}.md" + resource_uri = f"viking://resources/images/2026/06/11/role_{index}" + store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) + + fake_fs = _FakeVikingFS(store) + compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) + compactor._call_model = AsyncMock( + return_value=( + '{"memories":[{"title":"动漫角色照片",' + '"content":"用户上传过一组动漫角色照片,代表资源包括' + '[角色0](viking://resources/images/2026/06/11/role_0)。",' + '"resource_uris":["viking://resources/images/2026/06/11/role_0"],' + '"item_count":10}]}' + ) + ) + refresh_embedding = AsyncMock(return_value=True) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", + refresh_embedding, + ) + monkeypatch.setattr( + "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await compactor.compact_if_needed(ctx=request_context) + + aggregate_uri = "viking://user/ryoma/memories/resource_link_memories/动漫角色照片.md" + assert result["status"] == "success" + assert result["written_uris"] == [aggregate_uri] + assert len(result["deleted_uris"]) == 10 + assert aggregate_uri in store + assert all("memories/entities/动漫角色/角色" not in uri for uri in store) + + aggregate = MemoryFileUtils.read(store[aggregate_uri], uri=aggregate_uri) + assert aggregate.memory_type == RESOURCE_LINK_MEMORY_TYPE + assert aggregate.extra_fields["topic"] == "动漫角色照片" + assert aggregate.extra_fields["resource_link_state"]["item_count"] == 10 + assert aggregate.extra_fields["resource_refs"][0]["resource_uri"].endswith("/role_0") + refresh_embedding.assert_awaited_once() + refresh_overview.assert_awaited() + + +@pytest.mark.asyncio +async def test_compact_if_needed_skips_below_threshold(request_context): + store = {} + for index in range(9): + memory_uri = f"viking://user/ryoma/memories/entities/动漫角色/角色{index}.md" + resource_uri = f"viking://resources/images/2026/06/11/role_{index}" + store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) + + compactor = ResourceLinkMemoryCompactor(viking_fs=_FakeVikingFS(store)) + compactor._call_model = AsyncMock() + + result = await compactor.compact_if_needed(ctx=request_context) + + assert result == { + "status": "skipped", + "reason": "below_threshold", + "single_count": 9, + "aggregate_count": 0, + "total_memory_count": 9, + } + compactor._call_model.assert_not_called() + + +@pytest.mark.asyncio +async def test_compact_if_needed_counts_existing_aggregates_toward_threshold( + request_context, + monkeypatch, +): + store = {} + aggregate_uri = "viking://user/ryoma/memories/resource_link_memories/全球地标风景照片.md" + store[aggregate_uri] = _aggregate_memory( + aggregate_uri, + "viking://resources/images/2026/06/11/landmark_0", + item_count=10, + ) + for index in range(9): + memory_uri = f"viking://user/ryoma/memories/entities/照片资源/风景{index}.md" + resource_uri = f"viking://resources/images/2026/06/11/scene_{index}" + store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) + + fake_fs = _FakeVikingFS(store) + compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) + compactor._call_model = AsyncMock( + return_value=( + '{"memories":[{"title":"风景照片集合",' + '"content":"用户保存过一组风景照片,代表资源包括' + '[风景0](viking://resources/images/2026/06/11/scene_0)。",' + '"resource_uris":["viking://resources/images/2026/06/11/scene_0"],' + '"item_count":19}]}' + ) + ) + monkeypatch.setattr( + "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", + AsyncMock(return_value=True), + ) + monkeypatch.setattr( + "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_schema_overview", + AsyncMock(), + ) + + result = await compactor.compact_if_needed(ctx=request_context) + + assert result["status"] == "success" + assert aggregate_uri in result["deleted_uris"] + compactor._call_model.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_mark_managed_memories_marks_only_memory_files(request_context): + memory_uri = "viking://user/ryoma/memories/entities/动漫角色/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" + store = { + memory_uri: MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"用户上传了一张[越前龙马]({resource_uri})的照片。", + memory_type="entities", + extra_fields={ + "category": "动漫角色", + "name": "越前龙马", + "resource_refs": [{"resource_uri": resource_uri}], + }, + ) + ) + } + compactor = ResourceLinkMemoryCompactor(viking_fs=_FakeVikingFS(store)) + + marked = await compactor.mark_managed_memories( + ctx=request_context, + memory_uris=[memory_uri, "viking://resources/images/2026/06/11/yueqian_jpeg"], + created_at="2026-06-11T00:00:00+00:00", + ) + + assert marked == [memory_uri] + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.extra_fields[RESOURCE_LINK_MANAGED_FIELD] is True + assert mf.extra_fields["resource_link_source"] == "add_resource.reason" + assert mf.extra_fields["resource_link_created_at"] == "2026-06-11T00:00:00+00:00" + + +def test_clean_title_removes_upload_date_and_user_prefix(): + title = ResourceLinkMemoryCompactor._clean_title( + "2026年6月11日用户上传的全球知名地标风景照片合集", + 1, + ) + + assert title == "全球知名地标风景照片合集" + assert len(title) <= 24 + + +@pytest.mark.asyncio +async def test_write_aggregate_memories_truncates_long_content(request_context, monkeypatch): + fake_fs = _FakeVikingFS({}) + compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) + monkeypatch.setattr( + "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", + AsyncMock(return_value=True), + ) + long_content = "用户保存了一组风景照片。" + ("很长的补充信息" * 100) + + written = await compactor._write_aggregate_memories( + ctx=request_context, + aggregate_dir_uri="viking://user/ryoma/memories/resource_link_memories", + response=_CompactionResponse( + memories=[ + _CompactedMemory( + title="风景照片集合", + content=long_content, + resource_uris=["viking://resources/images/2026/06/11/scene_0"], + item_count=10, + ) + ] + ), + input_item_count=10, + ) + + mf = MemoryFileUtils.read(fake_fs.store[written[0]], uri=written[0]) + assert len(mf.content) <= 360 + assert mf.content.endswith("...") diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index 4fdfa50d4d..09856ae1cd 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for resource-memory linking service.""" +from types import SimpleNamespace from unittest.mock import AsyncMock import pytest @@ -54,6 +55,20 @@ async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): return [{"uri": memory_uri, "rel_path": "entities/wang.md", "isDir": False}] +class _FakeCompactor: + def __init__(self): + self.marked = None + self.enqueued = False + + async def mark_managed_memories(self, **kwargs): + self.marked = kwargs + return list(kwargs["memory_uris"]) + + async def enqueue_check(self, **kwargs): + self.enqueued = True + return "msg-1" + + @pytest.fixture def request_context(): return RequestContext( @@ -85,6 +100,51 @@ async def test_append_resource_refs_stores_only_memory_metadata(request_context) assert resource_uri not in store +@pytest.mark.asyncio +async def test_on_resource_added_marks_new_memories_for_compaction(request_context): + memory_uri = "viking://user/alice/memories/entities/动漫角色/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" + store = { + memory_uri: MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content="用户上传了一张越前龙马的照片。", + memory_type="entities", + extra_fields={"category": "动漫角色", "name": "越前龙马"}, + ) + ) + } + compactor = _FakeCompactor() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS(store), + compactor=compactor, + ) + service._run_extract_loop = AsyncMock( + return_value=( + SimpleNamespace(upsert_operations=[object()], delete_file_contents=[], errors=[]), + object(), + object(), + ) + ) + update_result = MemoryUpdateResult() + update_result.add_written(memory_uri) + service._apply_memory_operations = AsyncMock(return_value=update_result) + + result = await service.on_resource_added( + ctx=request_context, + resource_uri=resource_uri, + reason="这是越前龙马的照片", + source_name="yueqian.jpeg", + ) + + assert result["status"] == "success" + assert result["managed_memory_uris"] == [memory_uri] + assert result["compaction_msg_id"] == "msg-1" + assert compactor.enqueued is True + assert compactor.marked["memory_uris"] == [memory_uri] + assert compactor.marked["created_at"] + + def test_resource_linking_provider_detects_language_from_reason_not_resource_uri(): provider = _ResourceLinkingProvider( resource_uri="viking://resources/images/2026/06/10/yueqian_jpeg", diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index a76187b40d..00fbaacf7c 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -35,6 +35,7 @@ MemoryFileUtils, parse_memory_file_with_fields, ) +from openviking_cli.exceptions import NotFoundError from openviking_cli.session.user_id import UserIdentifier @@ -164,6 +165,42 @@ async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): ("viking://user/alice/memories/entities/动漫角色", True), ] + @pytest.mark.asyncio + async def test_generate_overview_skips_deleted_directory(self): + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + class FakeVikingFS: + def __init__(self): + self.rm_calls = [] + + async def ls(self, uri, show_all_hidden=False, ctx=None): + raise NotFoundError(uri, "directory") + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + + viking_fs = FakeVikingFS() + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=viking_fs) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.generate_overview( + "entities", + "viking://user/alice/memories/entities/动漫角色", + ctx, + ) + + assert viking_fs.rm_calls == [] + @pytest.mark.asyncio async def test_apply_operations_preserves_pre_resolved_multi_uris_for_new_page_ids(self): registry = MagicMock() diff --git a/tests/unit/test_extra_headers_vlm.py b/tests/unit/test_extra_headers_vlm.py index 1f0687df47..c7c12eaf06 100644 --- a/tests/unit/test_extra_headers_vlm.py +++ b/tests/unit/test_extra_headers_vlm.py @@ -316,6 +316,52 @@ def run_in_thread_loop(): assert result == [worker_loop_client] assert build_async_client.call_count == 2 + @patch("volcenginesdkarkruntime.Ark") + def test_volcengine_sync_client_applies_timeout_and_disables_sdk_retries( + self, + mock_ark_class, + ): + mock_ark_class.return_value = MagicMock() + + vlm = VolcEngineVLM( + { + "api_key": "sk-test", + "api_base": "https://ark.cn-beijing.volces.com/api/v3", + "timeout": 12.0, + "max_retries": 5, + } + ) + + _ = vlm.get_client() + + mock_ark_class.assert_called_once() + call_kwargs = mock_ark_class.call_args[1] + assert call_kwargs["timeout"] == 12.0 + assert call_kwargs["max_retries"] == 0 + + @patch("volcenginesdkarkruntime.AsyncArk") + def test_volcengine_async_client_applies_timeout_and_disables_sdk_retries( + self, + mock_async_ark_class, + ): + mock_async_ark_class.return_value = MagicMock() + + vlm = VolcEngineVLM( + { + "api_key": "sk-test", + "api_base": "https://ark.cn-beijing.volces.com/api/v3", + "timeout": 12.0, + "max_retries": 5, + } + ) + + _ = vlm._build_async_client() + + mock_async_ark_class.assert_called_once() + call_kwargs = mock_async_ark_class.call_args[1] + assert call_kwargs["timeout"] == 12.0 + assert call_kwargs["max_retries"] == 0 + @patch("openviking.models.vlm.backends.openai_vlm.openai.AzureOpenAI") def test_azure_sync_client_disables_sdk_retries(self, mock_azure_openai_class): mock_azure_openai_class.return_value = MagicMock() From 62663e14f042cfa853976b78f84a566eda2b8412 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 01:44:56 +0800 Subject: [PATCH 07/19] =?UTF-8?q?=E9=80=9A=E8=BF=87session.commit=E5=B0=81?= =?UTF-8?q?=E8=A3=85=20--reason?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/en/api/02-resources.md | 7 +- docs/en/api/05-sessions.md | 1 + docs/zh/api/02-resources.md | 6 +- docs/zh/api/05-sessions.md | 1 + .../memory/resource_link_memories.yaml | 24 - .../resource_link_memory_compaction.yaml | 88 --- .../processing/resource_linking.yaml | 79 --- openviking/service/core.py | 2 +- .../service/resource_link_memory_compactor.py | 662 ------------------ .../service/resource_memory_link_service.py | 561 +++++---------- openviking/service/resource_service.py | 106 ++- openviking/session/memory/memory_updater.py | 29 + .../session_extract_context_provider.py | 35 +- openviking/session/memory/utils/__init__.py | 2 + openviking/session/memory/utils/language.py | 37 +- .../session/memory/utils/resource_refs.py | 229 ++++++ openviking/storage/content_write.py | 191 +---- .../test_resource_link_memory_compactor.py | 290 -------- .../test_resource_memory_link_service.py | 298 ++------ .../memory/test_memory_react_system_prompt.py | 40 ++ tests/session/memory/test_memory_updater.py | 142 +++- 21 files changed, 847 insertions(+), 1983 deletions(-) delete mode 100644 openviking/prompts/templates/memory/resource_link_memories.yaml delete mode 100644 openviking/prompts/templates/processing/resource_link_memory_compaction.yaml delete mode 100644 openviking/prompts/templates/processing/resource_linking.yaml delete mode 100644 openviking/service/resource_link_memory_compactor.py create mode 100644 openviking/session/memory/utils/resource_refs.py delete mode 100644 tests/service/test_resource_link_memory_compactor.py diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index de16b4f51a..cbc9bb71c8 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -126,7 +126,8 @@ This endpoint is the core entry point for resource management, supporting adding 3. Call the corresponding Parser to parse content 4. Build the directory tree and write to AGFS 5. Wait for semantic processing completion when `wait=true`; with `wait=false`, return a `task_id` for queue tracking -6. Set up scheduled update task if `watch_interval` is specified +6. If `reason` is non-empty, commit a short temporary session through the normal memory extraction pipeline so suitable user memories can reference the resource URI +7. Set up scheduled update task if `watch_interval` is specified **Code Entry Points**: - `openviking/client/local.py:LocalClient.add_resource` - SDK entry (embedded) @@ -146,7 +147,7 @@ This endpoint is the core entry point for resource management, supporting adding | to | string | No | - | Target Viking URI (exact location). Mutually exclusive with `parent` | | parent | string | No | - | Parent Viking URI (resource placed under this directory). Mutually exclusive with `to` | | create_parent | bool | No | False | Automatically create parent directory if it does not exist (server-side flag) | -| reason | string | No | "" | Reason for adding the resource. When non-empty, OpenViking uses the reason and resource URI to generate or update user memory and record the resource reference in that memory | +| reason | string | No | "" | Reason for adding the resource. When non-empty, OpenViking runs it through the normal session memory extraction pipeline with the resource URI and records resource references in the resulting memory | | instruction | string | No | "" | Processing instructions for semantic extraction (experimental feature) | | wait | bool | No | False | Whether to wait for semantic processing and vectorization to complete before returning | | timeout | float | No | None | Timeout in seconds, only effective when `wait=True` | @@ -165,7 +166,7 @@ This endpoint is the core entry point for resource management, supporting adding - Raw HTTP calls for local files require first uploading via [temp_upload](#temp_upload) to obtain `temp_file_id` - When `to` is specified and the target already exists, triggers incremental update - Only Git repository sources use full background import when `wait=false`; OpenViking performs repository preflight and target planning before returning the `task_id`. -- Memory generated from `reason` does not expand resource content. It only uses `reason`, the `viking://resources/...` URI, and the available resource name. OpenViking chooses an appropriate existing user memory type, such as `profile`, `entities`, `events`, or `preferences`; it does not force writes into a fixed memory type. +- Memory generated from `reason` is extracted through the same pipeline as `session.commit`. It uses `reason`, the `viking://resources/...` URI, available source name, and available directory abstract; it does not inspect or expand the full resource content. OpenViking writes to existing memory types such as `entities`, `events`, or `preferences`, not a dedicated resource memory directory. - When deleting `viking://resources/...`, OpenViking scans the current user's memories before deletion, removes the matching resource URI and content introduced by that `reason`, and refreshes the semantic index for the affected memories. - Other sources with `wait=false` finish source parsing, target resolution, and AGFS writes before returning. Only semantic and embedding queues continue asynchronously. - When `watch_interval > 0`, the watch task binds to `to` if provided; otherwise it binds to the `root_uri` returned by this import. If no stable `root_uri` is available, the request fails and asks for an explicit `to`. diff --git a/docs/en/api/05-sessions.md b/docs/en/api/05-sessions.md index 0a61588acd..782473d025 100644 --- a/docs/en/api/05-sessions.md +++ b/docs/en/api/05-sessions.md @@ -906,6 +906,7 @@ Commit a session. Message archiving (Phase 1) completes immediately. Summary gen - Rapid consecutive commits on the same session are accepted; each request gets its own `task_id`. - Background Phase 2 work is serialized by archive order: archive `N+1` waits until archive `N` writes `.done`. - If an earlier archive failed and left no `.done`, later commit requests fail with `FAILED_PRECONDITION` until that failure is resolved. +- If committed messages contain durable facts, judgments, preferences, or events that mention `viking://resources/...`, memory extraction preserves the resource as a markdown link and records it in `MEMORY_FIELDS.resource_refs`. **Code Entries:** - `openviking/session/session.py:Session.commit_async()` - Core implementation diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 44df0bb22f..4bd9b819fb 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -121,7 +121,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector 3. 调用对应 Parser 解析内容 4. 构建目录树并写入 AGFS 5. `wait=true` 时等待语义处理完成;`wait=false` 时返回 `task_id` 用于队列跟踪 -6. 如果 `reason` 非空,基于 `reason` 和资源 URI 触发一次独立的 memory linking,生成或更新合适的用户记忆 +6. 如果 `reason` 非空,通过一次短临时 session 复用常规记忆抽取链路,让合适的用户记忆引用该资源 URI 7. 如指定 `--watch-interval`,设置定时更新任务 **代码入口**: @@ -142,7 +142,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector | to | string | 否 | - | 目标 Viking URI(精确位置)。与 `parent` 互斥 | | parent | string | 否 | - | 父级 Viking URI(资源放入此目录下)。与 `to` 互斥 | | create_parent | bool | 否 | False | 如果父目录不存在,自动创建父目录(服务端标志) | -| reason | string | 否 | "" | 添加资源的原因(用于文档化和相关性提升,实验特性) | +| reason | string | 否 | "" | 添加资源的原因;非空时会随资源 URI 进入常规 session 记忆抽取链路,并在生成的记忆中记录资源引用 | | instruction | string | 否 | "" | 语义提取的处理指令(实验特性) | | wait | bool | 否 | False | 是否等待语义处理和向量化完成才返回 | | timeout | float | 否 | None | 超时时间(秒),仅 `wait=true` 时生效 | @@ -159,7 +159,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector - `to` 和 `parent` 不能同时使用;如果使用 `parent` 且希望父目录不存在时自动创建,请传 `create_parent=true`。指定 `to` 且目标已存在时,触发增量更新。 - `path` 和 `temp_file_id` 不能同时指定,上传本地文件需要先通过 [temp_upload](#temp_upload) 上传获取 `temp_file_id`,在 SDK 和 CLI 中已经封装好。 - 只有 Git 仓库来源在 `wait=false` 时使用完整后台导入;OpenViking 会先完成仓库 preflight 和目标规划,再返回 `task_id`。 -- `reason` 触发的记忆生成不会读取或展开资源正文,只使用 `reason`、`viking://resources/...` URI 和可用的资源名称。 +- `reason` 触发的记忆生成复用 `session.commit` 的抽取链路,只使用 `reason`、`viking://resources/...` URI、可用的资源名称和目录摘要,不会读取或展开完整资源正文;系统会写入 `entities`、`events`、`preferences` 等已有记忆类型,不创建独立的资源记忆目录。 - 删除 `viking://resources/...` 时,系统会在删除前扫描当前用户记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 - 其他来源在 `wait=false` 时会在响应前完成来源解析、目标解析和 AGFS 写入,仅 semantic 与 embedding 队列继续异步处理。 - `watch_interval > 0` 时,如果指定了 `to`,监控任务绑定该目标;如果未指定 `to`,监控任务绑定本次导入返回的 `root_uri`。如果无法得到稳定 `root_uri`,请求会报错并要求显式传 `to`。 diff --git a/docs/zh/api/05-sessions.md b/docs/zh/api/05-sessions.md index 5e531b7454..12b6f54d5e 100644 --- a/docs/zh/api/05-sessions.md +++ b/docs/zh/api/05-sessions.md @@ -906,6 +906,7 @@ await client.session_used( - 同一 session 的多次快速连续 commit 会被接受;每次请求都会拿到独立的 `task_id` - 后台 Phase 2 会按 archive 顺序串行推进:`archive_N+1` 会等待 `archive_N` 写出 `.done` 后再继续 - 如果更早的 archive 已失败且没有 `.done`,后续 commit 会直接返回错误,直到该失败被处理 +- 如果提交的消息中包含带 `viking://resources/...` 的长期事实、评价、偏好或事件,记忆抽取会把资源保留为 markdown 链接,并写入 `MEMORY_FIELDS.resource_refs` **代码入口**: - `openviking/session/session.py:Session.commit_async()` - 核心实现 diff --git a/openviking/prompts/templates/memory/resource_link_memories.yaml b/openviking/prompts/templates/memory/resource_link_memories.yaml deleted file mode 100644 index ea29927cc4..0000000000 --- a/openviking/prompts/templates/memory/resource_link_memories.yaml +++ /dev/null @@ -1,24 +0,0 @@ -memory_type: resource_link_memories -description: | - System-managed aggregate memories for resources added with user reasons. -directory: "viking://user/{{ user_space }}/memories/resource_link_memories" -filename_template: "{{ topic }}.md" -enabled: true -agent_only: true -embedding_template: |- - {{ topic }} - - {{ content }} - -fields: - - name: topic - type: string - description: | - Short topic title for a resource-link aggregate memory. - merge_op: immutable - - - name: content - type: string - description: | - Human-readable aggregate summary for resource additions, with a few representative resource links. - merge_op: patch diff --git a/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml b/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml deleted file mode 100644 index bf53867177..0000000000 --- a/openviking/prompts/templates/processing/resource_link_memory_compaction.yaml +++ /dev/null @@ -1,88 +0,0 @@ -metadata: - id: "processing.resource_link_memory_compaction" - name: "Resource Link Memory Compaction" - description: "Compact many resource-link memories into a few readable aggregate memories" - version: "1.0.0" - language: "en" - category: "processing" - -variables: - - name: "output_language" - type: "string" - description: "Target language for memory content" - required: true - - - name: "target_memory_count" - type: "string" - description: "Maximum number of aggregate memories to return" - required: true - - - name: "max_resource_links" - type: "string" - description: "Maximum representative links per aggregate memory" - required: true - - - name: "user_id" - type: "string" - description: "The single OpenViking user that owns all input memories" - required: true - - - name: "aggregate_memories_json" - type: "string" - description: "Existing aggregate memories as JSON" - required: true - - - name: "resource_items_json" - type: "string" - description: "New single resource-link memories as JSON" - required: true - -template: | - You compact resource-addition memories. - - ## Objective - Rewrite the existing aggregate memories plus the new single resource memories into at most {{ target_memory_count }} readable aggregate memories. - For a broad collection with clear subtopics, prefer 2-3 aggregate memories instead of one oversized memory. - - ## Target Output Language - All titles and content MUST be written in {{ output_language }}. - - ## User Context - All input memories belong to one OpenViking user: {{ user_id }}. - Names mentioned in reasons/content are people, subjects, authors, photographers, or described actors from that user's resource descriptions. They are NOT OpenViking users unless the input explicitly says so. - - ## Existing Aggregate Memories - {{ aggregate_memories_json }} - - ## New Single Resource Memories - {{ resource_items_json }} - - ## Rules - - Preserve the user's intent, judgments, people, places, times, and topics when they are present. - - Do not say "multiple users", "N users", or "each user" unless the input explicitly refers to multiple OpenViking users. Prefer phrasing like "用户上传/保存了一组..." for this user's resources. - - Do not enumerate every resource. Group resources by meaningful topics such as person, place, time, scene, project, document, or user judgment. - - When many resources share a broad category but have distinct subthemes, split them into 2-3 concise topic memories. Example: city landmarks, natural landscapes, cultural/historic sites. - - If the resources are weakly related, create a concise "mixed resources" style topic instead of forcing false similarity. - - Each aggregate memory should be useful to a human reader and should mention approximate coverage, themes, and representative examples. - - Include at most {{ max_resource_links }} markdown resource links per aggregate memory. - - Use only resource URIs provided in the input, and only as markdown links. - - Do not include raw JSON, raw metadata, or raw URI lists in visible content. - - Do not claim that you inspected resource file contents. - - Keep titles short and file-name friendly: at most 16 Chinese characters or 8 English words. Omit upload dates from titles unless needed to distinguish topics. - - Keep each content under 300 Chinese characters or 180 English words. - - ## Output JSON - Return ONLY a JSON object: - { - "memories": [ - { - "title": "short topic title, safe as a file name", - "content": "markdown content with a few representative links", - "resource_uris": ["viking://resources/..."], - "item_count": 12 - } - ] - } - -llm_config: - temperature: 0.0 diff --git a/openviking/prompts/templates/processing/resource_linking.yaml b/openviking/prompts/templates/processing/resource_linking.yaml deleted file mode 100644 index 06f9b51651..0000000000 --- a/openviking/prompts/templates/processing/resource_linking.yaml +++ /dev/null @@ -1,79 +0,0 @@ -metadata: - id: "processing.resource_linking" - name: "Resource Linking" - description: "Create or update user memories from an add-resource reason" - version: "1.0.0" - language: "en" - category: "processing" - -variables: - - name: "output_language" - type: "string" - description: "Target language for memory content" - required: true - - - name: "resource_uri" - type: "string" - description: "Viking resource URI" - required: true - - - name: "reason" - type: "string" - description: "User-provided add-resource reason" - required: true - - - name: "source_name" - type: "string" - description: "Original resource display name" - required: false - default: "" - - - name: "added_at" - type: "string" - description: "Resource addition time" - required: false - default: "" - - - name: "resource_abstract" - type: "string" - description: "Optional directory abstract near the resource" - required: false - default: "" - -template: | - You are a memory extraction agent for a resource-addition event. - - ## Objective - Create or update user memories using ONLY the user-provided reason, the resource URI, the resource addition time, and the optional resource abstract. - - ## Target Output Language - All memory content MUST be written in {{ output_language }}. - - ## Resource Addition - Resource URI: {{ resource_uri }} - Source name: {{ source_name or "N/A" }} - Added at: {{ added_at or "N/A" }} - Resource abstract: {{ resource_abstract or "N/A" }} - Reason: {{ reason }} - - ## Rules - - Do NOT read, summarize, OCR, infer, or expand the resource file content. - - Treat the reason as the primary semantic evidence. - - Treat Added at as the time when the user added/uploaded this resource, not the time when the resource content was created, captured, signed, or happened. - - Treat Resource abstract as weak supporting context for a short resource descriptor only. Use it only when the descriptor adds non-redundant readability beyond the reason or existing memory; do not infer user facts from it unless the reason supports them. - - Choose the most appropriate existing user memory type from the output schema, such as profile, entities, events, or preferences. - - If the reason is not worth remembering, output no memory changes. - - Create/edit visible memory as durable natural sentences preserving user intent/judgment; rewrite terse resource labels. - - Example: reason "page 3 total should be 42" -> "User said page 3 total should be 42", not "stored report resource". - - When Source name alone is opaque, prefer one short neutral descriptor from Resource abstract if it helps readers understand what the resource is. Keep it under 12 Chinese characters or under 8 English words. - - Example: reason "这个项目是张三的项目", source "Apollo", abstract "Apollo is a config service..." -> "用户添加了 Apollo(配置服务项目),并说明它是张三的项目。" - - Omit the resource description if it merely repeats the subject, media type, or facts already stated by the reason or visible memory. - - Use the added date in visible memory only when it improves human readability or the memory would otherwise be temporally ambiguous. - - When editing existing memory, merge with it; never replace it with only the newest resource or enumerate/count resources. - - Use the Resource URI only as resource identity metadata. - - Do NOT include raw resource URIs, file paths, generated links, or raw Resource abstract text in visible memory content. - - Do NOT claim that you inspected the resource itself. - - Return only memory operations that are grounded in the reason. - -llm_config: - temperature: 0.0 diff --git a/openviking/service/core.py b/openviking/service/core.py index 9e7ed58892..9174d65a87 100644 --- a/openviking/service/core.py +++ b/openviking/service/core.py @@ -423,7 +423,7 @@ async def initialize(self) -> None: self._resource_memory_link_service.set_dependencies( vikingdb=self._vikingdb_manager, viking_fs=self._viking_fs, - queue_manager=self._queue_manager, + session_service=self._session_service, ) self._relation_service.set_viking_fs(self._viking_fs) self._pack_service.set_dependencies( diff --git a/openviking/service/resource_link_memory_compactor.py b/openviking/service/resource_link_memory_compactor.py deleted file mode 100644 index 95c683b54f..0000000000 --- a/openviking/service/resource_link_memory_compactor.py +++ /dev/null @@ -1,662 +0,0 @@ -# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. -# SPDX-License-Identifier: AGPL-3.0 -"""Compact resource-linked memories created from add-resource reasons.""" - -from __future__ import annotations - -import hashlib -import json -import re -import threading -from dataclasses import dataclass -from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Sequence - -from pydantic import BaseModel, Field - -from openviking.core.namespace import canonical_user_root, context_type_for_uri -from openviking.models.vlm.base import VLMResponse -from openviking.prompts.manager import render_prompt -from openviking.server.identity import RequestContext, Role -from openviking.session.memory.dataclass import MemoryFile -from openviking.session.memory.memory_updater import MemoryUpdater -from openviking.session.memory.utils import resolve_output_language -from openviking.session.memory.utils.json_parser import parse_json_with_stability -from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils -from openviking.storage import VikingDBManager -from openviking.storage.queuefs.named_queue import DequeueHandlerBase -from openviking.storage.queuefs.queue_manager import QueueManager -from openviking.storage.transaction.lock_context import LockContext -from openviking.storage.transaction.lock_manager import get_lock_manager -from openviking.storage.viking_fs import VikingFS, get_viking_fs -from openviking_cli.exceptions import NotFoundError -from openviking_cli.session.user_id import UserIdentifier -from openviking_cli.utils import get_logger -from openviking_cli.utils.config import get_openviking_config - -logger = get_logger(__name__) - -RESOURCE_LINK_MEMORY_TYPE = "resource_link_memories" -RESOURCE_LINK_COMPACTION_QUEUE = "ResourceLinkCompaction" -RESOURCE_LINK_MANAGED_FIELD = "resource_link_managed" -RESOURCE_LINK_SOURCE_FIELD = "resource_link_source" -RESOURCE_LINK_CREATED_AT_FIELD = "resource_link_created_at" -RESOURCE_LINK_STATE_FIELD = "resource_link_state" -RESOURCE_LINK_COMPACTION_REF_SOURCE = "resource_link.compaction" - -_COMPACTION_THRESHOLD = 10 -_COMPACTION_BATCH_SIZE = 50 -_TARGET_AGGREGATE_MEMORY_COUNT = 3 -_MAX_REPRESENTATIVE_LINKS = 5 -_MAX_REASON_CHARS = 180 -_MAX_AGGREGATE_CONTENT_CHARS = 1200 -_MAX_VISIBLE_CONTENT_CHARS = 360 -_MAX_TITLE_CHARS = 24 -_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") -_UNSAFE_FILENAME_CHARS_RE = re.compile(r"[\\/:\*\?\"<>\|\n\r\t]+") -_TITLE_DATE_PREFIX_RE = re.compile( - r"^(?:\d{4}年\d{1,2}月\d{1,2}日|\d{4}[-/]\d{1,2}[-/]\d{1,2})" -) -_TITLE_USER_ACTION_PREFIX_RE = re.compile(r"^(?:用户|该用户|当前用户)(?:上传|保存|添加|导入)的?") - - -@dataclass -class _CompactionCandidate: - uri: str - raw_hash: str - content: str - extra_fields: Dict[str, Any] - resource_refs: List[Dict[str, Any]] - resource_uris: List[str] - - @property - def item_count(self) -> int: - state = self.extra_fields.get(RESOURCE_LINK_STATE_FIELD) - if isinstance(state, dict): - try: - return max(1, int(state.get("item_count") or 0)) - except (TypeError, ValueError): - pass - return 1 - - -class _CompactedMemory(BaseModel): - title: str = "" - content: str = "" - resource_uris: List[str] = Field(default_factory=list) - item_count: int = 0 - - -class _CompactionResponse(BaseModel): - memories: List[_CompactedMemory] = Field(default_factory=list) - - -class ResourceLinkMemoryCompactor(DequeueHandlerBase): - """Batch compact system-managed resource-link memories.""" - - def __init__( - self, - *, - vikingdb: Optional[VikingDBManager] = None, - viking_fs: Optional[VikingFS] = None, - queue_manager: Optional[QueueManager] = None, - ): - self._vikingdb = vikingdb - self._viking_fs = viking_fs - self._queue_manager = queue_manager - self._coalesce_versions: Dict[str, int] = {} - self._coalesce_lock = threading.Lock() - if queue_manager: - self._ensure_queue() - - def set_dependencies( - self, - *, - vikingdb: Optional[VikingDBManager], - viking_fs: VikingFS, - queue_manager: Optional[QueueManager], - ) -> None: - self._vikingdb = vikingdb - self._viking_fs = viking_fs - self._queue_manager = queue_manager - if queue_manager: - self._ensure_queue() - - def _get_viking_fs(self) -> VikingFS: - return self._viking_fs or get_viking_fs() - - def _ensure_queue(self) -> None: - if not self._queue_manager: - return - self._queue_manager.get_queue( - RESOURCE_LINK_COMPACTION_QUEUE, - dequeue_handler=self, - allow_create=True, - ) - - async def mark_managed_memories( - self, - *, - ctx: RequestContext, - memory_uris: Sequence[str], - created_at: str, - ) -> List[str]: - """Mark newly-created add-resource memories as eligible for later compaction.""" - marked: List[str] = [] - viking_fs = self._get_viking_fs() - for memory_uri in dict.fromkeys(memory_uris): - if context_type_for_uri(memory_uri) != "memory": - continue - try: - raw = await viking_fs.read_file(memory_uri, ctx=ctx) - mf = MemoryFileUtils.read(raw, uri=memory_uri) - except Exception as exc: - logger.warning("Failed to mark resource-linked memory %s: %s", memory_uri, exc) - continue - if mf.memory_type == RESOURCE_LINK_MEMORY_TYPE: - continue - mf.extra_fields[RESOURCE_LINK_MANAGED_FIELD] = True - mf.extra_fields[RESOURCE_LINK_SOURCE_FIELD] = "add_resource.reason" - mf.extra_fields.setdefault(RESOURCE_LINK_CREATED_AT_FIELD, created_at) - await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) - marked.append(memory_uri) - return marked - - async def enqueue_check(self, *, ctx: RequestContext) -> Optional[str]: - """Enqueue a coalesced compaction check for the current user memory root.""" - if not self._queue_manager: - return None - self._ensure_queue() - key = self._coalesce_key(ctx) - with self._coalesce_lock: - version = self._coalesce_versions.get(key, 0) + 1 - self._coalesce_versions[key] = version - queue = self._queue_manager.get_queue( - RESOURCE_LINK_COMPACTION_QUEUE, - dequeue_handler=self, - allow_create=True, - ) - return await queue.enqueue( - { - "account_id": ctx.account_id, - "user_id": ctx.user.user_id, - "role": str(ctx.role.value if hasattr(ctx.role, "value") else ctx.role), - "coalesce_key": key, - "coalesce_version": version, - } - ) - - async def on_dequeue(self, data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - data = self._unwrap_queue_data(data) - if not data: - self.report_success() - return data - - key = str(data.get("coalesce_key") or "") - version = self._safe_int(data.get("coalesce_version"), default=0) - if self._is_stale(key, version): - self.report_success() - return data - - try: - ctx = RequestContext( - user=UserIdentifier(str(data["account_id"]), str(data["user_id"])), - role=Role(str(data.get("role") or Role.USER.value)), - ) - result = await self.compact_if_needed(ctx=ctx) - data["result"] = result - self.report_success() - except Exception as exc: - logger.warning("Resource-link compaction failed: %s", exc, exc_info=True) - data["error"] = str(exc) - self.report_error(str(exc), data) - return data - - async def compact_if_needed(self, *, ctx: RequestContext) -> Dict[str, Any]: - aggregate_dir_uri = self.aggregate_dir_uri(ctx) - try: - lock_manager = get_lock_manager() - lock_path = self._lock_path(aggregate_dir_uri, ctx) - except Exception: - lock_manager = None - lock_path = "" - - if lock_manager and lock_path: - async with LockContext(lock_manager, [lock_path], lock_mode="exact"): - return await self._compact_once(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) - return await self._compact_once(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) - - @staticmethod - def aggregate_dir_uri(ctx: RequestContext) -> str: - return f"{canonical_user_root(ctx)}/memories/{RESOURCE_LINK_MEMORY_TYPE}" - - async def _compact_once( - self, - *, - ctx: RequestContext, - aggregate_dir_uri: str, - ) -> Dict[str, Any]: - singles, aggregates = await self._scan_candidates(ctx=ctx, aggregate_dir_uri=aggregate_dir_uri) - total_memory_count = len(singles) + len(aggregates) - should_compact = ( - len(singles) >= _COMPACTION_THRESHOLD - or (len(singles) > 0 and total_memory_count >= _COMPACTION_THRESHOLD) - or len(aggregates) > _TARGET_AGGREGATE_MEMORY_COUNT - ) - if not should_compact: - return { - "status": "skipped", - "reason": "below_threshold", - "single_count": len(singles), - "aggregate_count": len(aggregates), - "total_memory_count": total_memory_count, - } - - batch = singles[:_COMPACTION_BATCH_SIZE] - response = await self._generate_compaction( - ctx=ctx, - batch=batch, - aggregates=aggregates, - ) - if not response.memories: - return { - "status": "skipped", - "reason": "empty_compaction_output", - "single_count": len(singles), - "aggregate_count": len(aggregates), - "total_memory_count": total_memory_count, - } - - written_uris = await self._write_aggregate_memories( - ctx=ctx, - aggregate_dir_uri=aggregate_dir_uri, - response=response, - input_item_count=sum(item.item_count for item in batch + aggregates), - ) - if not written_uris: - return {"status": "skipped", "reason": "no_aggregate_written"} - - target_uris = set(written_uris) - deleted_uris = await self._delete_compacted_inputs( - ctx=ctx, - candidates=[*batch, *aggregates], - keep_uris=target_uris, - ) - await self._refresh_deleted_parent_overviews(ctx=ctx, deleted_uris=deleted_uris) - - remaining_singles = max(0, len(singles) - len(batch)) - if remaining_singles >= _COMPACTION_THRESHOLD: - await self.enqueue_check(ctx=ctx) - - return { - "status": "success", - "written_uris": written_uris, - "deleted_uris": deleted_uris, - "remaining_single_count": remaining_singles, - } - - async def _scan_candidates( - self, - *, - ctx: RequestContext, - aggregate_dir_uri: str, - ) -> tuple[List[_CompactionCandidate], List[_CompactionCandidate]]: - viking_fs = self._get_viking_fs() - memory_root = f"{canonical_user_root(ctx)}/memories" - try: - entries = await viking_fs.tree( - memory_root, - ctx=ctx, - node_limit=1000000, - level_limit=None, - ) - except Exception as exc: - logger.warning("Failed to scan memories for resource-link compaction: %s", exc) - return [], [] - - singles: List[_CompactionCandidate] = [] - aggregates: List[_CompactionCandidate] = [] - for entry in entries: - uri = str(entry.get("uri") or "") - if not uri or bool(entry.get("isDir") or entry.get("is_dir")): - continue - if not uri.endswith(".md") or self._is_hidden_memory_file(uri): - continue - try: - raw = await viking_fs.read_file(uri, ctx=ctx) - mf = MemoryFileUtils.read(raw, uri=uri) - except Exception: - continue - - refs = self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) - resource_uris = self._resource_uris_from_memory(mf, refs) - candidate = _CompactionCandidate( - uri=uri, - raw_hash=self._hash_raw(raw), - content=mf.content or "", - extra_fields=dict(mf.extra_fields or {}), - resource_refs=refs, - resource_uris=resource_uris, - ) - if uri.startswith(aggregate_dir_uri.rstrip("/") + "/"): - aggregates.append(candidate) - elif mf.extra_fields.get(RESOURCE_LINK_MANAGED_FIELD) is True and refs: - singles.append(candidate) - - singles.sort(key=self._candidate_sort_key) - aggregates.sort(key=lambda item: item.uri) - return singles, aggregates - - async def _generate_compaction( - self, - *, - ctx: RequestContext, - batch: Sequence[_CompactionCandidate], - aggregates: Sequence[_CompactionCandidate], - ) -> _CompactionResponse: - prompt = render_prompt( - "processing.resource_link_memory_compaction", - { - "output_language": self._output_language(batch, aggregates), - "target_memory_count": str(_TARGET_AGGREGATE_MEMORY_COUNT), - "max_resource_links": str(_MAX_REPRESENTATIVE_LINKS), - "user_id": ctx.user.user_id, - "aggregate_memories_json": json.dumps( - [self._aggregate_prompt_item(item) for item in aggregates], - ensure_ascii=False, - indent=2, - ), - "resource_items_json": json.dumps( - [self._single_prompt_item(item) for item in batch], - ensure_ascii=False, - indent=2, - ), - }, - ) - content = await self._call_model(prompt) - parsed, error = parse_json_with_stability(content, _CompactionResponse) - if error or not parsed: - logger.warning("Failed to parse resource-link compaction output: %s", error) - return _CompactionResponse() - return parsed - - async def _call_model(self, prompt: str) -> str: - config = get_openviking_config() - vlm = config.vlm.get_vlm_instance() - response = await vlm.get_completion_async(prompt) - if isinstance(response, VLMResponse): - return response.content or "" - return str(response or "") - - async def _write_aggregate_memories( - self, - *, - ctx: RequestContext, - aggregate_dir_uri: str, - response: _CompactionResponse, - input_item_count: int, - ) -> List[str]: - now = datetime.now(timezone.utc).isoformat() - used_names: set[str] = set() - written_uris: List[str] = [] - viking_fs = self._get_viking_fs() - for index, memory in enumerate(response.memories[:_TARGET_AGGREGATE_MEMORY_COUNT], start=1): - title = self._clean_title(memory.title, index) - filename = self._unique_filename(title, used_names) - uri = f"{aggregate_dir_uri.rstrip('/')}/{filename}" - resource_uris = self._valid_resource_uris( - [*memory.resource_uris, *self._extract_resource_uris(memory.content)] - )[:_MAX_REPRESENTATIVE_LINKS] - content = self._truncate_text(memory.content.strip(), _MAX_VISIBLE_CONTENT_CHARS) - if not content: - continue - item_count = self._memory_item_count(memory, input_item_count) - mf = MemoryFile( - uri=uri, - content=content, - memory_type=RESOURCE_LINK_MEMORY_TYPE, - extra_fields={ - "topic": title, - RESOURCE_LINK_STATE_FIELD: { - "item_count": item_count, - "updated_at": now, - "representative_resources": [ - {"resource_uri": resource_uri} for resource_uri in resource_uris - ], - }, - "resource_refs": [ - { - "resource_uri": resource_uri, - "source": RESOURCE_LINK_COMPACTION_REF_SOURCE, - "created_at": now, - "match_text": title, - } - for resource_uri in resource_uris - ], - }, - ) - await viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) - await MemoryUpdater.refresh_file_embedding( - viking_fs=viking_fs, - vikingdb=self._vikingdb, - uri=uri, - memory_type=RESOURCE_LINK_MEMORY_TYPE, - ctx=ctx, - ) - written_uris.append(uri) - return written_uris - - async def _delete_compacted_inputs( - self, - *, - ctx: RequestContext, - candidates: Sequence[_CompactionCandidate], - keep_uris: set[str], - ) -> List[str]: - deleted: List[str] = [] - viking_fs = self._get_viking_fs() - for candidate in candidates: - if candidate.uri in keep_uris: - continue - try: - current_raw = await viking_fs.read_file(candidate.uri, ctx=ctx) - except (NotFoundError, FileNotFoundError, KeyError): - continue - except Exception: - continue - if self._hash_raw(current_raw) != candidate.raw_hash: - logger.info("Skip deleting changed resource-link memory: %s", candidate.uri) - continue - try: - await viking_fs.rm(candidate.uri, recursive=False, ctx=ctx) - deleted.append(candidate.uri) - except (NotFoundError, FileNotFoundError, KeyError): - continue - return deleted - - async def _refresh_deleted_parent_overviews( - self, - *, - ctx: RequestContext, - deleted_uris: Sequence[str], - ) -> None: - viking_fs = self._get_viking_fs() - parent_dirs = { - uri.rsplit("/", 1)[0] - for uri in deleted_uris - if context_type_for_uri(uri) == "memory" and "/" in uri - } - for directory_uri in sorted(parent_dirs): - await MemoryUpdater.refresh_schema_overview( - viking_fs=viking_fs, - directory_uri=directory_uri, - ctx=ctx, - ) - - def _lock_path(self, aggregate_dir_uri: str, ctx: RequestContext) -> str: - viking_fs = self._get_viking_fs() - if hasattr(viking_fs, "_uri_to_path"): - return viking_fs._uri_to_path(aggregate_dir_uri, ctx=ctx) - return aggregate_dir_uri - - def _is_stale(self, key: str, version: int) -> bool: - if not key or version <= 0: - return False - with self._coalesce_lock: - return version < self._coalesce_versions.get(key, 0) - - @staticmethod - def _coalesce_key(ctx: RequestContext) -> str: - return f"{ctx.account_id}|{ctx.user.user_id}|{RESOURCE_LINK_MEMORY_TYPE}" - - @staticmethod - def _unwrap_queue_data(data: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]: - if not isinstance(data, dict): - return data - payload = data.get("data") - if isinstance(payload, str): - try: - parsed = json.loads(payload) - if isinstance(parsed, dict): - return parsed - except Exception: - return data - if isinstance(payload, dict): - return payload - return data - - @staticmethod - def _hash_raw(raw: Any) -> str: - if isinstance(raw, bytes): - data = raw - else: - data = str(raw or "").encode("utf-8") - return hashlib.sha256(data).hexdigest() - - @staticmethod - def _is_hidden_memory_file(uri: str) -> bool: - leaf = uri.rsplit("/", 1)[-1] - return leaf.startswith(".") or uri.endswith("/.overview.md") or uri.endswith("/.abstract.md") - - @staticmethod - def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: - if isinstance(value, list): - return [dict(item) for item in value if isinstance(item, dict)] - if isinstance(value, dict): - return [dict(value)] - return [] - - @classmethod - def _resource_uris_from_memory( - cls, - mf: MemoryFile, - refs: Sequence[Dict[str, Any]], - ) -> List[str]: - uris = [str(ref.get("resource_uri") or "") for ref in refs] - uris.extend(cls._extract_resource_uris(mf.content or "")) - return cls._valid_resource_uris(uris) - - @staticmethod - def _valid_resource_uris(values: Sequence[str]) -> List[str]: - result: List[str] = [] - for value in values: - uri = str(value or "").strip().rstrip(".,;:!?,。;:!?") - if uri.startswith("viking://resources/") and uri not in result: - result.append(uri) - return result - - @staticmethod - def _extract_resource_uris(text: str) -> List[str]: - return _RESOURCE_URI_RE.findall(text or "") - - @classmethod - def _candidate_sort_key(cls, item: _CompactionCandidate) -> str: - values = [item.extra_fields.get(RESOURCE_LINK_CREATED_AT_FIELD)] - values.extend(ref.get("created_at") for ref in item.resource_refs) - for value in values: - if value: - return str(value) - return item.uri - - @classmethod - def _single_prompt_item(cls, item: _CompactionCandidate) -> Dict[str, Any]: - primary_ref = item.resource_refs[0] if item.resource_refs else {} - return { - "memory_uri": item.uri, - "content": cls._truncate_text(item.content, _MAX_REASON_CHARS * 2), - "reason": cls._truncate_text(str(primary_ref.get("reason") or ""), _MAX_REASON_CHARS), - "created_at": primary_ref.get("created_at") - or item.extra_fields.get(RESOURCE_LINK_CREATED_AT_FIELD) - or "", - "resource_uris": item.resource_uris[:_MAX_REPRESENTATIVE_LINKS], - "item_count": 1, - } - - @classmethod - def _aggregate_prompt_item(cls, item: _CompactionCandidate) -> Dict[str, Any]: - state = item.extra_fields.get(RESOURCE_LINK_STATE_FIELD) - return { - "memory_uri": item.uri, - "topic": item.extra_fields.get("topic") or item.uri.rsplit("/", 1)[-1].removesuffix(".md"), - "content": cls._truncate_text(item.content, _MAX_AGGREGATE_CONTENT_CHARS), - "resource_uris": item.resource_uris[:_MAX_REPRESENTATIVE_LINKS], - "item_count": item.item_count, - "state": state if isinstance(state, dict) else {}, - } - - @staticmethod - def _output_language( - batch: Sequence[_CompactionCandidate], - aggregates: Sequence[_CompactionCandidate], - ) -> str: - sample = "\n".join( - item.content for item in [*list(batch[:5]), *list(aggregates[:3])] if item.content - ) - return resolve_output_language(sample) - - @classmethod - def _clean_title(cls, title: str, index: int) -> str: - title = " ".join(str(title or "").split()) - title = _UNSAFE_FILENAME_CHARS_RE.sub("-", title).strip(" .-_") - title = _TITLE_DATE_PREFIX_RE.sub("", title).strip(" .-_") - title = _TITLE_USER_ACTION_PREFIX_RE.sub("", title).strip(" .-_") - if not title: - title = f"资源集合{index}" - return title[:_MAX_TITLE_CHARS].strip(" .-_") or f"资源集合{index}" - - @staticmethod - def _unique_filename(title: str, used_names: set[str]) -> str: - base = title.removesuffix(".md") - filename = f"{base}.md" - suffix = 2 - while filename in used_names: - filename = f"{base}-{suffix}.md" - suffix += 1 - used_names.add(filename) - return filename - - @staticmethod - def _memory_item_count(memory: _CompactedMemory, fallback_total: int) -> int: - try: - count = int(memory.item_count) - except (TypeError, ValueError): - count = 0 - if count > 0: - return count - if fallback_total > 0 and len(memory.resource_uris) <= 1: - return fallback_total - return max(1, len(memory.resource_uris)) - - @staticmethod - def _truncate_text(text: Any, max_chars: int) -> str: - value = " ".join(str(text or "").split()) - if len(value) <= max_chars: - return value - return value[: max_chars - 3].rstrip() + "..." - - @staticmethod - def _safe_int(value: Any, *, default: int) -> int: - try: - return int(value) - except (TypeError, ValueError): - return default diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index bf73afeff8..542986eca0 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -8,17 +8,17 @@ from __future__ import annotations -import re +import asyncio from dataclasses import dataclass from datetime import datetime, timezone -from typing import Any, Dict, Iterable, List, Optional, Sequence +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence +from uuid import uuid4 from openviking.core.namespace import canonical_user_root, context_type_for_uri from openviking.message import Message from openviking.message.part import TextPart from openviking.prompts.manager import render_prompt from openviking.server.identity import RequestContext -from openviking.service.resource_link_memory_compactor import ResourceLinkMemoryCompactor from openviking.session.memory.dataclass import MemoryFile, ResolvedOperations from openviking.session.memory.extract_loop import ExtractLoop from openviking.session.memory.memory_isolation_handler import MemoryIsolationHandler @@ -28,18 +28,20 @@ MemoryUpdateResult, ) from openviking.session.memory.session_extract_context_provider import SessionExtractContextProvider -from openviking.session.memory.utils.link_renderer import LinkRenderer from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils from openviking.storage import VikingDBManager -from openviking.storage.queuefs.queue_manager import QueueManager from openviking.storage.viking_fs import VikingFS, get_viking_fs from openviking_cli.exceptions import NotFoundError from openviking_cli.utils import VikingURI, get_logger from openviking_cli.utils.config import get_openviking_config +if TYPE_CHECKING: + from openviking.service.session_service import SessionService + logger = get_logger(__name__) -RESOURCE_REF_SOURCE = "add_resource.reason" +_RESOURCE_REASON_MEMORY_TYPES = ["entities", "events", "preferences"] +_RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS = 1800.0 _RESOURCE_ABSTRACT_MAX_CHARS = 200 _ABSTRACT_NOT_READY_MARKERS = ( "[.abstract.md is not ready]", @@ -54,97 +56,6 @@ class _MemoryRefMatch: resource_ref: Dict[str, Any] -class _ResourceLinkingProvider(SessionExtractContextProvider): - """Provider for creating/updating memory from an add-resource reason.""" - - def __init__( - self, - *, - resource_uri: str, - reason: str, - source_name: Optional[str], - added_at: Optional[str] = None, - resource_abstract: Optional[str] = None, - **kwargs: Any, - ): - self.resource_uri = resource_uri - self.reason = reason - self.source_name = source_name or "" - self.added_at = added_at or "" - self.resource_abstract = resource_abstract or "" - messages = [ - Message( - id="resource-linking", - role="user", - created_at=self.added_at or None, - parts=[ - TextPart( - text=( - "Resource URI: " - f"{resource_uri}\nReason: {reason}\nSource name: {self.source_name}\n" - f"Added at: {self.added_at or 'N/A'}\n" - f"Resource abstract: {self.resource_abstract or 'N/A'}" - ) - ) - ], - ) - ] - super().__init__(messages=messages, **kwargs) - - def instruction(self) -> str: - return render_prompt( - "processing.resource_linking", - { - "output_language": self.get_output_language(), - "resource_uri": self.resource_uri, - "reason": self.reason, - "source_name": self.source_name, - "added_at": self.added_at, - "resource_abstract": self.resource_abstract, - }, - ) - - def _build_conversation_message(self) -> Dict[str, Any]: - return { - "role": "user", - "content": ( - "## Resource Addition\n" - f"Resource URI: {self.resource_uri}\n" - f"Reason: {self.reason}\n" - f"Source name: {self.source_name or 'N/A'}\n" - f"Added at: {self.added_at or 'N/A'}\n" - f"Resource abstract: {self.resource_abstract or 'N/A'}\n\n" - "Analyze only this resource addition record and output all memory " - "write/edit/delete operations in a single JSON response." - ), - } - - def _build_prefetch_search_query(self) -> str: - return "\n".join( - part for part in [self.reason, self.source_name, self.resource_abstract] if part - ).strip() - - def get_conversation_text(self) -> str: - return "\n".join( - part - for part in [ - self.reason, - self.resource_uri, - self.source_name, - self.added_at, - self.resource_abstract, - ] - if part - ).strip() - - def _detect_language(self) -> str: - from openviking.session.memory.utils import resolve_output_language - - return resolve_output_language( - "\n".join(part for part in [self.reason, self.source_name] if part).strip() - ) - - class _ResourceUnlinkingProvider(SessionExtractContextProvider): """Provider for removing resource-derived content from one memory file.""" @@ -224,31 +135,24 @@ def __init__( *, vikingdb: Optional[VikingDBManager] = None, viking_fs: Optional[VikingFS] = None, - queue_manager: Optional[QueueManager] = None, - compactor: Optional[ResourceLinkMemoryCompactor] = None, + session_service: Optional["SessionService"] = None, ): self._vikingdb = vikingdb self._viking_fs = viking_fs - self._compactor = compactor or ResourceLinkMemoryCompactor( - vikingdb=vikingdb, - viking_fs=viking_fs, - queue_manager=queue_manager, - ) + self._session_service = session_service + self._background_tasks: set[asyncio.Task] = set() def set_dependencies( self, *, vikingdb: Optional[VikingDBManager], viking_fs: VikingFS, - queue_manager: Optional[QueueManager] = None, + session_service: Optional["SessionService"] = None, ) -> None: self._vikingdb = vikingdb self._viking_fs = viking_fs - self._compactor.set_dependencies( - vikingdb=vikingdb, - viking_fs=viking_fs, - queue_manager=queue_manager, - ) + if session_service is not None: + self._session_service = session_service def _get_viking_fs(self) -> VikingFS: return self._viking_fs or get_viking_fs() @@ -260,65 +164,177 @@ async def on_resource_added( resource_uri: str, reason: str, source_name: Optional[str] = None, + timeout: Optional[float] = None, ) -> Dict[str, Any]: - """Extract user memory from an add-resource reason.""" + """Bridge add-resource reason extraction through normal session commit.""" reason = (reason or "").strip() if not reason: return {"status": "skipped", "reason": "empty_reason"} if not resource_uri: return {"status": "skipped", "reason": "empty_resource_uri"} + if not self._session_service: + return {"status": "skipped", "reason": "session_service_unavailable"} added_at = datetime.now(timezone.utc).isoformat() resource_abstract = await self._read_resource_directory_abstract(resource_uri, ctx) - provider = _ResourceLinkingProvider( - resource_uri=resource_uri, - reason=reason, - source_name=source_name, - added_at=added_at, - resource_abstract=resource_abstract, - ctx=ctx, - viking_fs=self._get_viking_fs(), - ) - operations, extract_context, isolation_handler = await self._run_extract_loop( - provider=provider, - ctx=ctx, - ) - if not operations or not ( - operations.upsert_operations or operations.delete_file_contents or operations.errors - ): - return {"status": "no_changes", "memory_uris": []} + session_id = f"resource_reason_{uuid4().hex}" + commit_result: Dict[str, Any] = {} + task_result: Optional[Dict[str, Any]] = None + delete_session_now = True + try: + session = await self._session_service.create( + ctx, + session_id=session_id, + memory_policy={ + "self": {"enabled": True}, + "peer": {"enabled": False}, + "memory_types": _RESOURCE_REASON_MEMORY_TYPES, + }, + ) + session.add_messages( + [ + { + "role": "user", + "parts": [ + TextPart( + text=self._build_resource_addition_message( + resource_uri=resource_uri, + reason=reason, + source_name=source_name, + added_at=added_at, + resource_abstract=resource_abstract, + ) + ) + ], + "created_at": added_at, + } + ] + ) + commit_result = await self._session_service.commit_async( + session_id, + ctx, + keep_recent_count=0, + ) + task_id = commit_result.get("task_id") + if task_id: + try: + task_result = await self._wait_for_commit_task( + task_id=str(task_id), + ctx=ctx, + timeout=timeout, + ) + except asyncio.TimeoutError: + delete_session_now = False + self._schedule_session_delete_after_task( + session_id=session_id, + task_id=str(task_id), + ctx=ctx, + ) + raise + return { + "status": "success", + "session_id": session_id, + "commit_task_id": task_id, + "archive_uri": commit_result.get("archive_uri"), + "commit_task": task_result, + } + finally: + if delete_session_now: + await self._delete_temporary_session(session_id, ctx) - result = await self._apply_memory_operations( - provider=provider, - operations=operations, - ctx=ctx, - extract_context=extract_context, - isolation_handler=isolation_handler, + @staticmethod + def _build_resource_addition_message( + *, + resource_uri: str, + reason: str, + source_name: Optional[str], + added_at: str, + resource_abstract: str, + ) -> str: + return ( + "## Resource Addition\n" + f"Resource URI: {resource_uri}\n" + f"Source name: {source_name or 'N/A'}\n" + f"Added at: {added_at or 'N/A'}\n" + f"Resource abstract: {resource_abstract or 'N/A'}\n" + f"User reason: {reason}" ) - changed_uris = list(dict.fromkeys(result.written_uris + result.edited_uris)) - await self._append_resource_refs( - memory_uris=changed_uris, - resource_uri=resource_uri, - reason=reason, - ctx=ctx, - created_at=added_at, + + def _schedule_session_delete_after_task( + self, + *, + session_id: str, + task_id: str, + ctx: RequestContext, + ) -> None: + task = asyncio.create_task( + self._delete_session_after_task(session_id=session_id, task_id=task_id, ctx=ctx) ) - managed_uris = await self._compactor.mark_managed_memories( - ctx=ctx, - memory_uris=result.written_uris, - created_at=added_at, + self._background_tasks.add(task) + task.add_done_callback(self._background_tasks.discard) + + async def _delete_session_after_task( + self, + *, + session_id: str, + task_id: str, + ctx: RequestContext, + ) -> None: + try: + await self._wait_for_commit_task( + task_id=task_id, + ctx=ctx, + timeout=_RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS, + ) + except Exception as exc: + logger.warning( + "Skipped temporary resource reason session cleanup after task %s: %s", + task_id, + exc, + ) + return + await self._delete_temporary_session(session_id, ctx) + + async def _delete_temporary_session(self, session_id: str, ctx: RequestContext) -> None: + if not self._session_service: + return + try: + await self._session_service.delete(session_id, ctx) + except NotFoundError: + pass + except Exception as exc: + logger.warning("Failed to delete temporary resource reason session: %s", exc) + + async def _wait_for_commit_task( + self, + *, + task_id: str, + ctx: RequestContext, + timeout: Optional[float], + ) -> Dict[str, Any]: + from openviking.service.task_tracker import get_task_tracker + + async def _poll() -> Dict[str, Any]: + tracker = get_task_tracker() + while True: + task = await tracker.get( + task_id, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + if task is None: + raise RuntimeError(f"session commit task not found: {task_id}") + status = task.status.value if hasattr(task.status, "value") else str(task.status) + if status == "completed": + return task.to_dict() + if status == "failed": + raise RuntimeError(task.error or f"session commit task failed: {task_id}") + await asyncio.sleep(0.1) + + return await asyncio.wait_for( + _poll(), + timeout=timeout or _RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS, ) - compaction_msg_id = await self._compactor.enqueue_check(ctx=ctx) - missing_uri = await self._memory_files_missing_resource_uri(changed_uris, resource_uri, ctx) - return { - "status": "success" if not result.errors else "partial_success", - "memory_uris": changed_uris, - "managed_memory_uris": managed_uris, - "compaction_msg_id": compaction_msg_id, - "deleted_memory_uris": result.deleted_uris, - "errors": [f"{uri}: {exc}" for uri, exc in result.errors], - "missing_resource_uri_uris": missing_uri, - } async def before_resource_delete( self, @@ -514,53 +530,6 @@ def _mark_result_deleted(result: MemoryUpdateResult, uri: str) -> None: if uri not in result.deleted_uris: result.add_deleted(uri) - async def _append_resource_refs( - self, - *, - memory_uris: Sequence[str], - resource_uri: str, - reason: str, - ctx: RequestContext, - created_at: Optional[str] = None, - ) -> None: - viking_fs = self._get_viking_fs() - created_at = created_at or datetime.now(timezone.utc).isoformat() - for memory_uri in dict.fromkeys(memory_uris): - if context_type_for_uri(memory_uri) != "memory": - continue - try: - raw = await viking_fs.read_file(memory_uri, ctx=ctx) - mf = MemoryFileUtils.read(raw, uri=memory_uri) - except Exception as exc: - logger.warning("Failed to read memory for resource ref append: %s", exc) - continue - existing_refs = self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) - allow_sentence_fallback = not any( - not self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=False) - for ref in existing_refs - ) - match_text = self._pick_match_text(mf, reason) - mf.content, rendered_match_text = self._link_resource_in_content( - mf.content, - resource_uri=resource_uri, - match_text=match_text, - allow_sentence_fallback=allow_sentence_fallback, - ) - match_text = rendered_match_text or match_text - ref = { - "resource_uri": resource_uri, - "reason": reason, - "source": RESOURCE_REF_SOURCE, - "created_at": created_at, - } - if match_text: - ref["match_text"] = match_text - mf.extra_fields["resource_refs"] = self._merge_resource_refs( - existing_refs, - ref, - ) - await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) - async def _remove_resource_refs( self, memory_uri: str, @@ -666,23 +635,6 @@ def _clean_resource_abstract(abstract: Any) -> str: return text[: _RESOURCE_ABSTRACT_MAX_CHARS - 3].rstrip() + "..." return text - async def _memory_files_missing_resource_uri( - self, - memory_uris: Iterable[str], - resource_uri: str, - ctx: RequestContext, - ) -> List[str]: - missing: List[str] = [] - viking_fs = self._get_viking_fs() - for uri in memory_uris: - try: - raw = await viking_fs.read_file(uri, ctx=ctx) - except Exception: - continue - if resource_uri not in raw: - missing.append(uri) - return missing - async def _assert_resource_unlinked( self, memory_uri: str, @@ -700,19 +652,6 @@ async def _assert_resource_unlinked( if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=True): raise RuntimeError(f"memory still contains resource ref: {memory_uri}") - @staticmethod - def _merge_resource_refs(existing: Any, new_ref: Dict[str, Any]) -> List[Dict[str, Any]]: - refs = ResourceMemoryLinkService._coerce_resource_refs(existing) - for ref in refs: - if ( - ref.get("resource_uri") == new_ref.get("resource_uri") - and ref.get("source") == new_ref.get("source") - ): - ref.update({k: v for k, v in new_ref.items() if v}) - return refs - refs.append(new_ref) - return refs - @staticmethod def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: if isinstance(value, list): @@ -744,162 +683,6 @@ def _resource_ref_matches( return True return recursive and normalized_ref.startswith(normalized_target + "/") - @classmethod - def _pick_match_text(cls, memory_file: MemoryFile, reason: str) -> Optional[str]: - content = memory_file.content or "" - candidates = [] - name = str(memory_file.extra_fields.get("name") or "").strip() - if name: - candidates.append(name) - reason_anchor = cls._extract_anchor_from_reason(reason) - if reason_anchor: - candidates.append(reason_anchor) - for token in (reason or "").replace(",", " ").replace(",", " ").split(): - stripped = token.strip() - if stripped: - candidates.append(stripped) - candidates.extend(["资源", "resource", "Resource"]) - for candidate in dict.fromkeys(candidates): - if candidate and LinkRenderer._find_match_span(content, candidate): - return candidate - return None - - @staticmethod - def _extract_anchor_from_reason(reason: str) -> Optional[str]: - text = (reason or "").strip() - if not text: - return None - patterns = [ - r"^(?:这是一张|这是|这张|这个|用户上传了(?:一张|一个)?|上传了(?:一张|一个)?|新增了|添加了)?\s*(?P[^,,。!?\n]{1,60}?)(?:的)?(?:照片|图片|截图|图像|文件|资源|文档|身份证|证件|资料)\s*$", - r"(?P[^,,。!?\n]{1,60}?)(?:的)?(?:照片|图片|截图|图像|文件|资源|文档|身份证|证件|资料)", - ] - for pattern in patterns: - match = re.search(pattern, text) - if not match: - continue - anchor = (match.group("anchor") or "").strip() - anchor = re.sub(r"^(?:关于|有关|一张|一个)\s*", "", anchor).strip() - anchor = re.sub(r"(?:的|之)$", "", anchor).strip() - if anchor: - return anchor - return None - - @classmethod - def _link_resource_in_content( - cls, - content: str, - *, - resource_uri: str, - match_text: Optional[str], - allow_sentence_fallback: bool, - ) -> tuple[str, Optional[str]]: - content = content or "" - if not content or not resource_uri: - return content, None - if cls._content_links_resource(content, resource_uri): - return content, match_text - - if match_text: - span = cls._find_unlinked_match_span(content, match_text) - if span: - linked = cls._replace_span_with_link(content, span, resource_uri) - linked = cls._remove_redundant_visible_resource_uri(linked, resource_uri) - return linked, content[span[0] : span[1]] - - if allow_sentence_fallback: - span = cls._first_sentence_span(content, resource_uri) - if span: - linked = cls._replace_span_with_link(content, span, resource_uri) - linked = cls._remove_redundant_visible_resource_uri(linked, resource_uri) - return linked, content[span[0] : span[1]].strip() - - return content, match_text - - @staticmethod - def _content_links_resource(content: str, resource_uri: str) -> bool: - return bool( - re.search( - r"\[[^\]]+\]\(" + re.escape(resource_uri) + r"\)", - content or "", - ) - ) - - @classmethod - def _find_unlinked_match_span( - cls, - content: str, - match_text: str, - ) -> Optional[tuple[int, int]]: - span = LinkRenderer._find_match_span(content, match_text) - if not span: - return None - if cls._span_inside_markdown_link(content, span): - return None - return span - - @staticmethod - def _span_inside_markdown_link(content: str, span: tuple[int, int]) -> bool: - start, end = span - for match in re.finditer(r"\[[^\]]+\]\([^)]+\)", content or ""): - if start >= match.start() and end <= match.end(): - return True - return False - - @staticmethod - def _replace_span_with_link( - content: str, - span: tuple[int, int], - resource_uri: str, - ) -> str: - start, end = span - anchor = content[start:end] - return f"{content[:start]}[{anchor}]({resource_uri}){content[end:]}" - - @staticmethod - def _first_sentence_span(content: str, resource_uri: str) -> Optional[tuple[int, int]]: - match = re.search(r"\S", content or "") - if not match: - return None - start = match.start() - line_end = content.find("\n", start) - if line_end == -1: - line_end = len(content) - line = content[start:line_end] - punctuation = re.search(r"[。!?.!?]", line) - end = start + punctuation.end() if punctuation else line_end - sentence = content[start:end].strip() - if not sentence or resource_uri in sentence or len(sentence) > 160: - return None - if ResourceMemoryLinkService._span_inside_markdown_link(content, (start, end)): - return None - return start, end - - @staticmethod - def _remove_redundant_visible_resource_uri(content: str, resource_uri: str) -> str: - if not ResourceMemoryLinkService._content_links_resource(content, resource_uri): - return content - uri = ResourceMemoryLinkService._visible_resource_uri_pattern(resource_uri) - label = r"(?:resource\s+URI|资源\s*URI|资源地址|资源链接)" - patterns = [ - re.compile(rf"(?im)^[ \t]*(?:[-*]\s*)?{label}\s*[::]\s*{uri}[ \t]*(?:\r?\n|$)"), - re.compile(rf"\s*[,,;;]?\s*{label}\s*[::]\s*{uri}"), - re.compile(rf"\s*[::]\s*{uri}"), - ] - cleaned = content - for pattern in patterns: - cleaned = pattern.sub("", cleaned) - cleaned = re.sub(r"[ \t]+([。!?.!?,,;;])", r"\1", cleaned) - cleaned = re.sub(r"\n{3,}", "\n\n", cleaned) - return cleaned.strip() - - @staticmethod - def _visible_resource_uri_pattern(resource_uri: str) -> str: - markdown_escaped_chars = set(r"\`*_{}[]()#+-.!|") - return "".join( - rf"\\?{re.escape(char)}" if char in markdown_escaped_chars else re.escape(char) - for char in resource_uri - ) - @staticmethod def _infer_memory_type(memory_uri: str, memory_file: MemoryFile) -> str: memory_type = ( diff --git a/openviking/service/resource_service.py b/openviking/service/resource_service.py index 3f8646fa53..71365ba2dd 100644 --- a/openviking/service/resource_service.py +++ b/openviking/service/resource_service.py @@ -630,12 +630,14 @@ async def add_resource( logger.warning( f"[ResourceService] Failed to cancel watch task for {to}: {e}" ) - await self._link_resource_reason_memory( - result=result, - ctx=ctx, - reason=reason, - source_name=kwargs.get("source_name"), - ) + if wait: + await self._link_resource_reason_memory( + result=result, + ctx=ctx, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, + ) if not wait: from openviking.service.task_tracker import get_task_tracker @@ -651,25 +653,33 @@ async def add_resource( if telemetry_id: monitor_started = True background = asyncio.create_task( - self._monitor_queue_processing( + self._monitor_resource_queue_then_link_memory( task.task_id, telemetry_id, - ctx.account_id, - ctx.user.user_id, + ctx, + root_uri=root_uri, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, ) ) self._background_tasks.add(background) background.add_done_callback(self._background_tasks.discard) else: - await task_tracker.start( - task.task_id, account_id=ctx.account_id, user_id=ctx.user.user_id - ) - await task_tracker.complete( - task.task_id, - {"root_uri": root_uri}, - account_id=ctx.account_id, - user_id=ctx.user.user_id, + monitor_started = True + background = asyncio.create_task( + self._monitor_resource_queue_then_link_memory( + task.task_id, + None, + ctx, + root_uri=root_uri, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, + ) ) + self._background_tasks.add(background) + background.add_done_callback(self._background_tasks.discard) return result except Exception as exc: telemetry.set_error( @@ -694,6 +704,7 @@ async def _link_resource_reason_memory( ctx: RequestContext, reason: str, source_name: Optional[str], + timeout: Optional[float] = None, ) -> None: if not self._resource_memory_link_service: return @@ -708,12 +719,73 @@ async def _link_resource_reason_memory( resource_uri=root_uri, reason=reason, source_name=source_name, + timeout=timeout, ) result["memory_linking"] = link_result except Exception as exc: logger.warning("[ResourceService] Failed to link resource reason memory: %s", exc) result.setdefault("warnings", []).append(f"Memory linking failed: {exc}") + async def _monitor_resource_queue_then_link_memory( + self, + task_id: str, + telemetry_id: Optional[str], + ctx: RequestContext, + *, + root_uri: str, + reason: str, + source_name: Optional[str], + timeout: Optional[float], + ) -> None: + from openviking.service.task_tracker import get_task_tracker + + task_tracker = get_task_tracker() + request_wait_tracker = get_request_wait_tracker() + await task_tracker.start(task_id, account_id=ctx.account_id, user_id=ctx.user.user_id) + try: + if telemetry_id: + await request_wait_tracker.wait_for_request(telemetry_id) + status = request_wait_tracker.build_queue_status(telemetry_id) + else: + status = build_queue_status_payload( + await get_queue_manager().wait_complete(timeout=timeout) + ) + errors = sum(int(group.get("error_count", 0) or 0) for group in status.values()) + if errors: + await task_tracker.fail( + task_id, + f"queue processing failed: {status}", + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + return + + result: Dict[str, Any] = {"root_uri": root_uri, "queue_status": status} + await self._link_resource_reason_memory( + result=result, + ctx=ctx, + reason=reason, + source_name=source_name, + timeout=timeout, + ) + await task_tracker.complete( + task_id, + result, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + except Exception as exc: + await task_tracker.fail( + task_id, + str(exc), + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + finally: + if telemetry_id: + request_wait_tracker.cleanup(telemetry_id) + unregister_wait_telemetry(telemetry_id) + async def _monitor_queue_processing( self, task_id: str, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 36d06da444..3ba3aa1c7c 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -29,6 +29,10 @@ from openviking.session.memory.merge_op import MergeOpFactory from openviking.session.memory.page_id_map import PageIdMap from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + RESOURCE_REF_SOURCE_SESSION_COMMIT, + sync_memory_resource_refs, +) from openviking.session.memory.utils.template_utils import TemplateUtils from openviking.session.memory.utils.uri import render_template from openviking.storage.viking_fs import get_viking_fs @@ -654,6 +658,8 @@ async def apply_operations( tracer.error(f"Failed to delete memory {file_content.uri}", e) result.add_error(file_content.uri, e) + await self._sync_resource_refs_for_result(result, ctx) + # Vectorize written and edited memories uri_memory_type_map = {} for op in operations.upsert_operations: @@ -697,6 +703,29 @@ async def apply_operations( return result + async def _sync_resource_refs_for_result( + self, + result: MemoryUpdateResult, + ctx: RequestContext, + ) -> None: + """Synchronize resource refs for memory files touched by session extraction.""" + viking_fs = self._get_viking_fs() + deleted_uris = set(result.deleted_uris) + for uri in dict.fromkeys(result.written_uris + result.edited_uris): + if uri in deleted_uris or uri.endswith("/.overview.md") or uri.endswith("/.abstract.md"): + continue + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=uri) + changed = sync_memory_resource_refs( + mf, + source=RESOURCE_REF_SOURCE_SESSION_COMMIT, + ) + if changed: + await viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) + except Exception as exc: + logger.warning("Failed to sync resource refs for %s: %s", uri, exc) + async def _apply_upsert( self, resolved_op: ResolvedOperation, ctx: RequestContext, extract_context: Any = None ): diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 3afed47fee..1a0a5159d5 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -8,6 +8,7 @@ import json import os +import re from typing import TYPE_CHECKING, Any, Dict, List, Optional from openviking.message.part import ToolPart @@ -44,6 +45,9 @@ _PREFETCH_SEARCH_TEXT_PART_MAX_CHARS = 1000 _PREFETCH_SEARCH_ASSISTANT_TEXT_PART_MAX_CHARS = 500 _PREFETCH_SEARCH_TOOL_FIELD_MAX_CHARS = 500 +_RESOURCE_REASON_LANGUAGE_RE = re.compile( + r"(?im)^\s*(?:User reason|用户说明|用户原因|用户理由)[::]\s*(.+?)\s*$" +) class SessionExtractContextProvider(ExtractContextProvider): @@ -107,20 +111,39 @@ def get_extract_context(self) -> "ExtractContext": def _detect_language(self) -> str: """检测输出语言""" from openviking.message.part import TextPart - from openviking.session.memory.utils import resolve_output_language + from openviking.session.memory.utils import ( + resolve_output_language, + strip_language_detection_noise, + ) user_text_parts = [] all_text_parts = [] for message in self.messages or []: for part in getattr(message, "parts", []): if isinstance(part, TextPart) and part.text: - all_text_parts.append(part.text) + text = self._language_signal_text( + part.text, + strip_language_detection_noise=strip_language_detection_noise, + ) + all_text_parts.append(text) if getattr(message, "role", "") == "user": - user_text_parts.append(part.text) + user_text_parts.append(text) text_parts = user_text_parts or all_text_parts return resolve_output_language("\n".join(text_parts)) + @staticmethod + def _language_signal_text(text: str, *, strip_language_detection_noise) -> str: + """Keep user-authored language signal and drop machine-oriented URI noise.""" + reason_lines = [ + match.group(1).strip() + for match in _RESOURCE_REASON_LANGUAGE_RE.finditer(text or "") + if match.group(1).strip() + ] + if reason_lines: + return "\n".join(reason_lines) + return strip_language_detection_noise(text) + def get_output_language(self) -> str: return self._output_language @@ -144,6 +167,12 @@ def instruction(self) -> str: ## URI Handling The system automatically generates URIs based on memory_type and fields. Just provide correct memory_type and fields. +## Resource URI Handling +- If the conversation contains a `viking://resources/...` URI and the user says a durable fact, judgment, preference, or event about it, extract that memory into the appropriate normal memory type such as entities, events, or preferences. +- Preserve resource references as markdown links in visible memory content when useful. Example: user said "用户保存了越前龙马照片 viking://resources/images/ryoma" -> write "用户保存了[越前龙马照片](viking://resources/images/ryoma)". +- If the user already wrote `[text](viking://resources/...)`, keep the same resource link intent. +- Do NOT claim you inspected, summarized, OCRed, or opened the resource file unless the conversation explicitly provides that fact. + ## Self and Peer Memory When a memory item describes the current user, omit peer_id. When a memory item describes a peer, set peer_id to one of the peer_id values allowed by diff --git a/openviking/session/memory/utils/__init__.py b/openviking/session/memory/utils/__init__.py index 85fe0f557b..2cbb32792e 100644 --- a/openviking/session/memory/utils/__init__.py +++ b/openviking/session/memory/utils/__init__.py @@ -20,6 +20,7 @@ resolve_output_language, resolve_output_language_from_conversation, resolve_with_override, + strip_language_detection_noise, ) from openviking.session.memory.utils.line_numbers import ( add_line_numbers, @@ -47,6 +48,7 @@ "resolve_output_language", "resolve_output_language_from_conversation", "resolve_with_override", + "strip_language_detection_noise", "add_line_numbers", "every_line_has_line_numbers", "extract_start_line_number", diff --git a/openviking/session/memory/utils/language.py b/openviking/session/memory/utils/language.py index 2fa7d3f04f..16ad145134 100644 --- a/openviking/session/memory/utils/language.py +++ b/openviking/session/memory/utils/language.py @@ -21,6 +21,7 @@ _STRONG_DOMINANT_MIN_CHARS = 10 _STRONG_DOMINANT_RATIO = 0.95 _PRIMARY_LANGUAGES = {"zh-CN", "en"} +_URI_LANGUAGE_NOISE_RE = re.compile(r"\b(?:viking|https?)://[^\s<>\]\)\"']+") _LATIN_STOPWORDS = { "en": set( @@ -52,12 +53,30 @@ } _LATIN_HINT_LANGUAGES = {"it", "fr", "es", "de", "pt"} -_LOCALE_LANGUAGE_PREFIXES = dict( - zh="zh-CN", ja="ja", ko="ko", ru="ru", ar="ar", - it="it", fr="fr", es="es", de="de", pt="pt", en="en", - chinese="zh-CN", japanese="ja", korean="ko", russian="ru", arabic="ar", - italian="it", french="fr", spanish="es", german="de", portuguese="pt", english="en", -) +_LOCALE_LANGUAGE_PREFIXES = { + "zh": "zh-CN", + "ja": "ja", + "ko": "ko", + "ru": "ru", + "ar": "ar", + "it": "it", + "fr": "fr", + "es": "es", + "de": "de", + "pt": "pt", + "en": "en", + "chinese": "zh-CN", + "japanese": "ja", + "korean": "ko", + "russian": "ru", + "arabic": "ar", + "italian": "it", + "french": "fr", + "spanish": "es", + "german": "de", + "portuguese": "pt", + "english": "en", +} # Use Timezone as a weak fallback signal. _TIMEZONE_LANGUAGE_GROUPS = { @@ -231,6 +250,7 @@ def _detect_latin_language(text: str, fallback_language: str) -> str: def _detect_language_from_text(user_text: str, fallback_language: str) -> str: """Internal shared helper to detect dominant language from text.""" fallback = (fallback_language or "en").strip() or "en" + user_text = strip_language_detection_noise(user_text) if not user_text: return fallback @@ -291,6 +311,11 @@ def resolve_with_override(config, detect: Callable[[], str]) -> str: return detect() +def strip_language_detection_noise(text: str) -> str: + """Remove URI-like machine tokens that should not affect output language.""" + return _URI_LANGUAGE_NOISE_RE.sub(" ", text or "") + + def resolve_output_language(text: str, config=None) -> str: """Resolve output language from text, honoring config override before detection.""" fallback = _resolve_system_fallback_language("en") diff --git a/openviking/session/memory/utils/resource_refs.py b/openviking/session/memory/utils/resource_refs.py new file mode 100644 index 0000000000..222b4db45d --- /dev/null +++ b/openviking/session/memory/utils/resource_refs.py @@ -0,0 +1,229 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Helpers for resource references embedded in memory content.""" + +from __future__ import annotations + +import re +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional, Sequence + +from openviking.session.memory.dataclass import MemoryFile + +RESOURCE_REF_SOURCE_CONTENT_WRITE = "content.write" +RESOURCE_REF_SOURCE_SESSION_COMMIT = "session.commit" + +_MARKDOWN_RESOURCE_LINK_RE = re.compile(r"\[([^\]\n]+)\]\((viking://resources/[^)\s]+)\)") +_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") +_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]+`") +_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" +_SENTENCE_BOUNDARIES = "。!?.!?\n" +_MAX_LINKIFIED_SENTENCE_CHARS = 160 + + +def sync_memory_resource_refs( + mf: MemoryFile, + *, + source: str, + reason: Optional[str] = None, + created_at: Optional[str] = None, +) -> bool: + """Link visible resource URIs and keep MEMORY_FIELDS.resource_refs in sync.""" + before_content = mf.content + before_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + + code_spans = _protected_code_spans(mf.content) + markdown_refs, markdown_spans = _extract_markdown_resource_refs( + mf.content, + code_spans, + ) + mf.content, bare_refs = _linkify_bare_resource_uris( + mf.content, + code_spans + markdown_spans, + ) + _merge_resource_refs( + mf, + markdown_refs + bare_refs, + source=source, + reason=reason, + created_at=created_at, + ) + + after_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + return before_content != mf.content or before_refs != after_refs + + +def coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + return _coerce_resource_refs(value) + + +def _protected_code_spans(content: str) -> List[tuple[int, int]]: + spans = [(match.start(), match.end()) for match in _CODE_BLOCK_RE.finditer(content or "")] + spans.extend((match.start(), match.end()) for match in _INLINE_CODE_RE.finditer(content or "")) + return spans + + +def _extract_markdown_resource_refs( + content: str, + protected_spans: Sequence[tuple[int, int]], +) -> tuple[List[Dict[str, Any]], List[tuple[int, int]]]: + refs: List[Dict[str, Any]] = [] + link_spans: List[tuple[int, int]] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + if _overlaps_spans(match.start(), match.end(), protected_spans): + continue + label = match.group(1).strip() + resource_uri = _trim_resource_uri(match.group(2).strip()) + link_spans.append((match.start(), match.end())) + refs.append( + { + "resource_uri": resource_uri, + "match_text": label or None, + } + ) + return refs, link_spans + + +def _linkify_bare_resource_uris( + content: str, + protected_spans: Sequence[tuple[int, int]], +) -> tuple[str, List[Dict[str, Any]]]: + refs: List[Dict[str, Any]] = [] + updated = content or "" + covered_start = len(updated) + 1 + + matches = list(_RESOURCE_URI_RE.finditer(updated)) + for match in reversed(matches): + resource_uri = _trim_resource_uri(match.group(0)) + if not resource_uri: + continue + start = match.start() + end = start + len(resource_uri) + if _overlaps_spans(start, end, protected_spans): + continue + + refs.append({"resource_uri": resource_uri}) + sentence_span = _previous_sentence_span(updated, start) + if not sentence_span: + continue + sentence_start, sentence_end = sentence_span + if end > covered_start: + continue + anchor = updated[sentence_start:sentence_end] + if "viking://resources/" in anchor or "](" in anchor: + continue + refs[-1]["match_text"] = anchor + replacement = f"[{anchor}]({resource_uri})" + updated = updated[:sentence_start] + replacement + updated[end:] + covered_start = sentence_start + + refs.reverse() + return updated, refs + + +def _previous_sentence_span(content: str, uri_start: int) -> Optional[tuple[int, int]]: + sentence_end = uri_start + while sentence_end > 0 and content[sentence_end - 1].isspace(): + sentence_end -= 1 + if sentence_end <= 0: + return None + + boundary_search_end = sentence_end + if content[sentence_end - 1] in _SENTENCE_BOUNDARIES: + boundary_search_end = sentence_end - 1 + sentence_start = 0 + for idx in range(boundary_search_end - 1, -1, -1): + if content[idx] in _SENTENCE_BOUNDARIES: + sentence_start = idx + 1 + break + while sentence_start < sentence_end and content[sentence_start].isspace(): + sentence_start += 1 + + anchor = content[sentence_start:sentence_end] + if not anchor or len(anchor) > _MAX_LINKIFIED_SENTENCE_CHARS: + return None + return sentence_start, sentence_end + + +def _merge_resource_refs( + mf: MemoryFile, + refs: Sequence[Dict[str, Any]], + *, + source: str, + reason: Optional[str], + created_at: Optional[str], +) -> None: + visible_refs: Dict[str, Dict[str, Any]] = {} + for ref in refs: + resource_uri = ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + continue + existing = visible_refs.setdefault(resource_uri, {"resource_uri": resource_uri}) + match_text = ref.get("match_text") + if match_text and not existing.get("match_text"): + existing["match_text"] = match_text + + existing_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + merged: List[Dict[str, Any]] = [] + seen_resource_uris: set[str] = set() + ref_created_at = created_at or datetime.now(timezone.utc).isoformat() + + for existing_ref in existing_refs: + resource_uri = existing_ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + merged.append(existing_ref) + continue + + visible_ref = visible_refs.get(resource_uri) + if existing_ref.get("source") == source and visible_ref is None: + continue + + if visible_ref and existing_ref.get("source") == source: + if visible_ref.get("match_text"): + existing_ref["match_text"] = visible_ref["match_text"] + existing_ref.setdefault("created_at", ref_created_at) + if reason: + existing_ref.setdefault("reason", reason) + + merged.append(existing_ref) + seen_resource_uris.add(resource_uri) + + for resource_uri, visible_ref in visible_refs.items(): + if resource_uri in seen_resource_uris: + continue + ref = { + "resource_uri": resource_uri, + "source": source, + "created_at": ref_created_at, + } + if reason: + ref["reason"] = reason + if visible_ref.get("match_text"): + ref["match_text"] = visible_ref["match_text"] + merged.append(ref) + + if merged: + mf.extra_fields["resource_refs"] = merged + else: + mf.extra_fields.pop("resource_refs", None) + + +def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + + +def _trim_resource_uri(resource_uri: str) -> str: + return (resource_uri or "").rstrip(_TRAILING_URI_PUNCTUATION) + + +def _overlaps_spans( + start: int, + end: int, + protected_spans: Sequence[tuple[int, int]], +) -> bool: + return any(start < span_end and end > span_start for span_start, span_end in protected_spans) diff --git a/openviking/storage/content_write.py b/openviking/storage/content_write.py index 80070ebed1..64e745c506 100644 --- a/openviking/storage/content_write.py +++ b/openviking/storage/content_write.py @@ -5,15 +5,17 @@ from __future__ import annotations import os -import re -from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, Optional from openviking.core.namespace import NamespaceShapeError, canonicalize_uri, context_type_for_uri from openviking.resource.watch_storage import is_watch_task_control_uri from openviking.server.identity import RequestContext from openviking.session.memory.memory_updater import MemoryUpdater from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + RESOURCE_REF_SOURCE_CONTENT_WRITE, + sync_memory_resource_refs, +) from openviking.storage.queuefs import SemanticMsg, get_queue_manager from openviking.storage.queuefs.semantic_msg import build_semantic_coalesce_key from openviking.storage.transaction import get_lock_manager @@ -36,14 +38,6 @@ _CREATE_ALLOWED_EXTENSIONS = frozenset( {".md", ".txt", ".json", ".yaml", ".yml", ".toml", ".py", ".js", ".ts"} ) -_CONTENT_WRITE_RESOURCE_REF_SOURCE = "content.write" -_MARKDOWN_RESOURCE_LINK_RE = re.compile(r"\[([^\]\n]+)\]\((viking://resources/[^)\s]+)\)") -_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") -_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) -_INLINE_CODE_RE = re.compile(r"`[^`\n]+`") -_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" -_SENTENCE_BOUNDARIES = "。!?.!?\n" -_MAX_LINKIFIED_SENTENCE_CHARS = 160 class ContentWriteCoordinator: @@ -374,7 +368,7 @@ async def _write_in_place( mf.content = mf.content + content else: mf = MemoryFileUtils.read(content, uri=uri) - self._sync_memory_resource_refs(mf) + sync_memory_resource_refs(mf, source=RESOURCE_REF_SOURCE_CONTENT_WRITE) await self._viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) return @@ -387,179 +381,6 @@ async def _write_in_place( return await self._viking_fs.write_file(uri, content, ctx=ctx) - def _sync_memory_resource_refs(self, mf) -> None: - code_spans = self._protected_code_spans(mf.content) - markdown_refs, markdown_spans = self._extract_markdown_resource_refs( - mf.content, - code_spans, - ) - mf.content, bare_refs = self._linkify_bare_resource_uris( - mf.content, - code_spans + markdown_spans, - ) - self._merge_content_write_resource_refs(mf, markdown_refs + bare_refs) - - @staticmethod - def _protected_code_spans(content: str) -> List[tuple[int, int]]: - spans = [(match.start(), match.end()) for match in _CODE_BLOCK_RE.finditer(content or "")] - spans.extend((match.start(), match.end()) for match in _INLINE_CODE_RE.finditer(content or "")) - return spans - - @classmethod - def _extract_markdown_resource_refs( - cls, - content: str, - protected_spans: Sequence[tuple[int, int]], - ) -> tuple[List[Dict[str, Any]], List[tuple[int, int]]]: - refs: List[Dict[str, Any]] = [] - link_spans: List[tuple[int, int]] = [] - for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): - if cls._overlaps_spans(match.start(), match.end(), protected_spans): - continue - label = match.group(1).strip() - resource_uri = cls._trim_resource_uri(match.group(2).strip()) - link_spans.append((match.start(), match.end())) - refs.append( - { - "resource_uri": resource_uri, - "match_text": label or None, - } - ) - return refs, link_spans - - @classmethod - def _linkify_bare_resource_uris( - cls, - content: str, - protected_spans: Sequence[tuple[int, int]], - ) -> tuple[str, List[Dict[str, Any]]]: - refs: List[Dict[str, Any]] = [] - updated = content or "" - covered_start = len(updated) + 1 - - matches = list(_RESOURCE_URI_RE.finditer(updated)) - for match in reversed(matches): - resource_uri = cls._trim_resource_uri(match.group(0)) - if not resource_uri: - continue - start = match.start() - end = start + len(resource_uri) - if cls._overlaps_spans(start, end, protected_spans): - continue - - refs.append({"resource_uri": resource_uri}) - sentence_span = cls._previous_sentence_span(updated, start) - if not sentence_span: - continue - sentence_start, sentence_end = sentence_span - if end > covered_start: - continue - anchor = updated[sentence_start:sentence_end] - if "viking://resources/" in anchor or "](" in anchor: - continue - refs[-1]["match_text"] = anchor - replacement = f"[{anchor}]({resource_uri})" - updated = updated[:sentence_start] + replacement + updated[end:] - covered_start = sentence_start - - refs.reverse() - return updated, refs - - @staticmethod - def _previous_sentence_span(content: str, uri_start: int) -> Optional[tuple[int, int]]: - sentence_end = uri_start - while sentence_end > 0 and content[sentence_end - 1].isspace(): - sentence_end -= 1 - if sentence_end <= 0: - return None - - boundary_search_end = sentence_end - if content[sentence_end - 1] in _SENTENCE_BOUNDARIES: - boundary_search_end = sentence_end - 1 - sentence_start = 0 - for idx in range(boundary_search_end - 1, -1, -1): - if content[idx] in _SENTENCE_BOUNDARIES: - sentence_start = idx + 1 - break - while sentence_start < sentence_end and content[sentence_start].isspace(): - sentence_start += 1 - - anchor = content[sentence_start:sentence_end] - if not anchor or len(anchor) > _MAX_LINKIFIED_SENTENCE_CHARS: - return None - return sentence_start, sentence_end - - @staticmethod - def _trim_resource_uri(resource_uri: str) -> str: - return (resource_uri or "").rstrip(_TRAILING_URI_PUNCTUATION) - - @staticmethod - def _overlaps_spans( - start: int, - end: int, - protected_spans: Sequence[tuple[int, int]], - ) -> bool: - return any(start < span_end and end > span_start for span_start, span_end in protected_spans) - - @classmethod - def _merge_content_write_resource_refs(cls, mf, refs: Sequence[Dict[str, Any]]) -> None: - visible_refs: Dict[str, Dict[str, Any]] = {} - for ref in refs: - resource_uri = ref.get("resource_uri") - if not isinstance(resource_uri, str) or not resource_uri: - continue - existing = visible_refs.setdefault(resource_uri, {"resource_uri": resource_uri}) - match_text = ref.get("match_text") - if match_text and not existing.get("match_text"): - existing["match_text"] = match_text - - existing_refs = cls._coerce_resource_refs(mf.extra_fields.get("resource_refs")) - merged: List[Dict[str, Any]] = [] - seen_resource_uris: set[str] = set() - created_at = datetime.now(timezone.utc).isoformat() - for existing_ref in existing_refs: - resource_uri = existing_ref.get("resource_uri") - if not isinstance(resource_uri, str) or not resource_uri: - merged.append(existing_ref) - continue - visible_ref = visible_refs.get(resource_uri) - if ( - existing_ref.get("source") == _CONTENT_WRITE_RESOURCE_REF_SOURCE - and visible_ref is None - ): - continue - if visible_ref and existing_ref.get("source") == _CONTENT_WRITE_RESOURCE_REF_SOURCE: - if visible_ref.get("match_text"): - existing_ref["match_text"] = visible_ref["match_text"] - existing_ref.setdefault("created_at", created_at) - merged.append(existing_ref) - seen_resource_uris.add(resource_uri) - - for resource_uri, visible_ref in visible_refs.items(): - if resource_uri in seen_resource_uris: - continue - ref = { - "resource_uri": resource_uri, - "source": _CONTENT_WRITE_RESOURCE_REF_SOURCE, - "created_at": created_at, - } - if visible_ref.get("match_text"): - ref["match_text"] = visible_ref["match_text"] - merged.append(ref) - - if merged: - mf.extra_fields["resource_refs"] = merged - else: - mf.extra_fields.pop("resource_refs", None) - - @staticmethod - def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: - if isinstance(value, list): - return [dict(item) for item in value if isinstance(item, dict)] - if isinstance(value, dict): - return [dict(value)] - return [] - async def _enqueue_semantic_refresh( self, *, diff --git a/tests/service/test_resource_link_memory_compactor.py b/tests/service/test_resource_link_memory_compactor.py deleted file mode 100644 index a4ffa5d81d..0000000000 --- a/tests/service/test_resource_link_memory_compactor.py +++ /dev/null @@ -1,290 +0,0 @@ -# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. -# SPDX-License-Identifier: AGPL-3.0 -"""Tests for resource-link memory compaction.""" - -from unittest.mock import AsyncMock - -import pytest - -from openviking.server.identity import RequestContext, Role -from openviking.service.resource_link_memory_compactor import ( - RESOURCE_LINK_MANAGED_FIELD, - RESOURCE_LINK_MEMORY_TYPE, - ResourceLinkMemoryCompactor, - _CompactedMemory, - _CompactionResponse, -) -from openviking.session.memory.dataclass import MemoryFile -from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils -from openviking_cli.session.user_id import UserIdentifier - - -class _FakeVikingFS: - def __init__(self, store): - self.store = store - self.rm_calls = [] - - async def read_file(self, uri, ctx=None): - return self.store[uri] - - async def write_file(self, uri, content, ctx=None): - self.store[uri] = content - - async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): - self.rm_calls.append((uri, recursive)) - self.store.pop(uri, None) - - async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): - prefix = uri.rstrip("/") + "/" - return [ - { - "uri": item_uri, - "rel_path": item_uri.removeprefix(prefix), - "isDir": False, - } - for item_uri in list(self.store) - if item_uri.startswith(prefix) - ] - - -@pytest.fixture -def request_context(): - return RequestContext( - user=UserIdentifier("acct", "ryoma"), - role=Role.USER, - ) - - -def _managed_memory(uri: str, resource_uri: str, index: int) -> str: - return MemoryFileUtils.write( - MemoryFile( - uri=uri, - content=f"用户上传了一张角色{index}的照片。", - memory_type="entities", - extra_fields={ - "category": "动漫角色", - "name": f"角色{index}", - RESOURCE_LINK_MANAGED_FIELD: True, - "resource_refs": [ - { - "resource_uri": resource_uri, - "reason": f"这是角色{index}的照片", - "source": "add_resource.reason", - "created_at": f"2026-06-11T00:00:{index:02d}+00:00", - } - ], - }, - ) - ) - - -def _aggregate_memory(uri: str, resource_uri: str, item_count: int = 10) -> str: - return MemoryFileUtils.write( - MemoryFile( - uri=uri, - content="用户保存过一组全球地标风景照片。", - memory_type=RESOURCE_LINK_MEMORY_TYPE, - extra_fields={ - "topic": "全球地标风景照片", - "resource_link_state": {"item_count": item_count}, - "resource_refs": [ - { - "resource_uri": resource_uri, - "source": "resource_link.compaction", - } - ], - }, - ) - ) - - -@pytest.mark.asyncio -async def test_compact_if_needed_writes_aggregate_and_deletes_managed_inputs( - request_context, - monkeypatch, -): - store = {} - for index in range(10): - memory_uri = f"viking://user/ryoma/memories/entities/动漫角色/角色{index}.md" - resource_uri = f"viking://resources/images/2026/06/11/role_{index}" - store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) - - fake_fs = _FakeVikingFS(store) - compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) - compactor._call_model = AsyncMock( - return_value=( - '{"memories":[{"title":"动漫角色照片",' - '"content":"用户上传过一组动漫角色照片,代表资源包括' - '[角色0](viking://resources/images/2026/06/11/role_0)。",' - '"resource_uris":["viking://resources/images/2026/06/11/role_0"],' - '"item_count":10}]}' - ) - ) - refresh_embedding = AsyncMock(return_value=True) - refresh_overview = AsyncMock() - monkeypatch.setattr( - "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", - refresh_embedding, - ) - monkeypatch.setattr( - "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_schema_overview", - refresh_overview, - ) - - result = await compactor.compact_if_needed(ctx=request_context) - - aggregate_uri = "viking://user/ryoma/memories/resource_link_memories/动漫角色照片.md" - assert result["status"] == "success" - assert result["written_uris"] == [aggregate_uri] - assert len(result["deleted_uris"]) == 10 - assert aggregate_uri in store - assert all("memories/entities/动漫角色/角色" not in uri for uri in store) - - aggregate = MemoryFileUtils.read(store[aggregate_uri], uri=aggregate_uri) - assert aggregate.memory_type == RESOURCE_LINK_MEMORY_TYPE - assert aggregate.extra_fields["topic"] == "动漫角色照片" - assert aggregate.extra_fields["resource_link_state"]["item_count"] == 10 - assert aggregate.extra_fields["resource_refs"][0]["resource_uri"].endswith("/role_0") - refresh_embedding.assert_awaited_once() - refresh_overview.assert_awaited() - - -@pytest.mark.asyncio -async def test_compact_if_needed_skips_below_threshold(request_context): - store = {} - for index in range(9): - memory_uri = f"viking://user/ryoma/memories/entities/动漫角色/角色{index}.md" - resource_uri = f"viking://resources/images/2026/06/11/role_{index}" - store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) - - compactor = ResourceLinkMemoryCompactor(viking_fs=_FakeVikingFS(store)) - compactor._call_model = AsyncMock() - - result = await compactor.compact_if_needed(ctx=request_context) - - assert result == { - "status": "skipped", - "reason": "below_threshold", - "single_count": 9, - "aggregate_count": 0, - "total_memory_count": 9, - } - compactor._call_model.assert_not_called() - - -@pytest.mark.asyncio -async def test_compact_if_needed_counts_existing_aggregates_toward_threshold( - request_context, - monkeypatch, -): - store = {} - aggregate_uri = "viking://user/ryoma/memories/resource_link_memories/全球地标风景照片.md" - store[aggregate_uri] = _aggregate_memory( - aggregate_uri, - "viking://resources/images/2026/06/11/landmark_0", - item_count=10, - ) - for index in range(9): - memory_uri = f"viking://user/ryoma/memories/entities/照片资源/风景{index}.md" - resource_uri = f"viking://resources/images/2026/06/11/scene_{index}" - store[memory_uri] = _managed_memory(memory_uri, resource_uri, index) - - fake_fs = _FakeVikingFS(store) - compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) - compactor._call_model = AsyncMock( - return_value=( - '{"memories":[{"title":"风景照片集合",' - '"content":"用户保存过一组风景照片,代表资源包括' - '[风景0](viking://resources/images/2026/06/11/scene_0)。",' - '"resource_uris":["viking://resources/images/2026/06/11/scene_0"],' - '"item_count":19}]}' - ) - ) - monkeypatch.setattr( - "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", - AsyncMock(return_value=True), - ) - monkeypatch.setattr( - "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_schema_overview", - AsyncMock(), - ) - - result = await compactor.compact_if_needed(ctx=request_context) - - assert result["status"] == "success" - assert aggregate_uri in result["deleted_uris"] - compactor._call_model.assert_awaited_once() - - -@pytest.mark.asyncio -async def test_mark_managed_memories_marks_only_memory_files(request_context): - memory_uri = "viking://user/ryoma/memories/entities/动漫角色/越前龙马.md" - resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" - store = { - memory_uri: MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content=f"用户上传了一张[越前龙马]({resource_uri})的照片。", - memory_type="entities", - extra_fields={ - "category": "动漫角色", - "name": "越前龙马", - "resource_refs": [{"resource_uri": resource_uri}], - }, - ) - ) - } - compactor = ResourceLinkMemoryCompactor(viking_fs=_FakeVikingFS(store)) - - marked = await compactor.mark_managed_memories( - ctx=request_context, - memory_uris=[memory_uri, "viking://resources/images/2026/06/11/yueqian_jpeg"], - created_at="2026-06-11T00:00:00+00:00", - ) - - assert marked == [memory_uri] - mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) - assert mf.extra_fields[RESOURCE_LINK_MANAGED_FIELD] is True - assert mf.extra_fields["resource_link_source"] == "add_resource.reason" - assert mf.extra_fields["resource_link_created_at"] == "2026-06-11T00:00:00+00:00" - - -def test_clean_title_removes_upload_date_and_user_prefix(): - title = ResourceLinkMemoryCompactor._clean_title( - "2026年6月11日用户上传的全球知名地标风景照片合集", - 1, - ) - - assert title == "全球知名地标风景照片合集" - assert len(title) <= 24 - - -@pytest.mark.asyncio -async def test_write_aggregate_memories_truncates_long_content(request_context, monkeypatch): - fake_fs = _FakeVikingFS({}) - compactor = ResourceLinkMemoryCompactor(viking_fs=fake_fs) - monkeypatch.setattr( - "openviking.service.resource_link_memory_compactor.MemoryUpdater.refresh_file_embedding", - AsyncMock(return_value=True), - ) - long_content = "用户保存了一组风景照片。" + ("很长的补充信息" * 100) - - written = await compactor._write_aggregate_memories( - ctx=request_context, - aggregate_dir_uri="viking://user/ryoma/memories/resource_link_memories", - response=_CompactionResponse( - memories=[ - _CompactedMemory( - title="风景照片集合", - content=long_content, - resource_uris=["viking://resources/images/2026/06/11/scene_0"], - item_count=10, - ) - ] - ), - input_item_count=10, - ) - - mf = MemoryFileUtils.read(fake_fs.store[written[0]], uri=written[0]) - assert len(mf.content) <= 360 - assert mf.content.endswith("...") diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index 09856ae1cd..4ebe912aca 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -2,16 +2,12 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for resource-memory linking service.""" -from types import SimpleNamespace from unittest.mock import AsyncMock import pytest from openviking.server.identity import RequestContext, Role -from openviking.service.resource_memory_link_service import ( - ResourceMemoryLinkService, - _ResourceLinkingProvider, -) +from openviking.service.resource_memory_link_service import ResourceMemoryLinkService from openviking.session.memory.dataclass import MemoryFile from openviking.session.memory.memory_updater import MemoryUpdateResult from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils @@ -55,18 +51,46 @@ async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): return [{"uri": memory_uri, "rel_path": "entities/wang.md", "isDir": False}] -class _FakeCompactor: +class _FakeSession: def __init__(self): - self.marked = None - self.enqueued = False + self.messages = [] - async def mark_managed_memories(self, **kwargs): - self.marked = kwargs - return list(kwargs["memory_uris"]) + def add_messages(self, specs): + self.messages.extend(specs) - async def enqueue_check(self, **kwargs): - self.enqueued = True - return "msg-1" + +class _FakeSessionService: + def __init__(self): + self.session = _FakeSession() + self.created = [] + self.committed = [] + self.deleted = [] + + async def create(self, ctx, session_id=None, memory_policy=None): + self.created.append( + { + "ctx": ctx, + "session_id": session_id, + "memory_policy": memory_policy, + } + ) + return self.session + + async def commit_async(self, session_id, ctx, keep_recent_count=0): + self.committed.append( + { + "ctx": ctx, + "session_id": session_id, + "keep_recent_count": keep_recent_count, + } + ) + return { + "task_id": None, + "archive_uri": f"viking://user/alice/sessions/{session_id}/history/archive_001", + } + + async def delete(self, session_id, ctx): + self.deleted.append({"ctx": ctx, "session_id": session_id}) @pytest.fixture @@ -78,57 +102,15 @@ def request_context(): @pytest.mark.asyncio -async def test_append_resource_refs_stores_only_memory_metadata(request_context): - memory_uri = "viking://user/alice/memories/entities/wang.md" - resource_uri = "viking://resources/id_card.pdf" - store = {memory_uri: "王大锤的身份证资料。\n"} - service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - - await service._append_resource_refs( - memory_uris=[memory_uri], - resource_uri=resource_uri, - reason="这是王大锤的身份证", - ctx=request_context, - ) - - mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) - assert mf.extra_fields["resource_refs"][0]["resource_uri"] == resource_uri - assert mf.extra_fields["resource_refs"][0]["source"] == "add_resource.reason" - assert mf.extra_fields["resource_refs"][0]["match_text"] == "王大锤" - assert mf.links == [] - assert f"[王大锤]({resource_uri})" in store[memory_uri] - assert resource_uri not in store - - -@pytest.mark.asyncio -async def test_on_resource_added_marks_new_memories_for_compaction(request_context): - memory_uri = "viking://user/alice/memories/entities/动漫角色/越前龙马.md" +async def test_on_resource_added_bridges_reason_through_temporary_session(request_context): resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" - store = { - memory_uri: MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content="用户上传了一张越前龙马的照片。", - memory_type="entities", - extra_fields={"category": "动漫角色", "name": "越前龙马"}, - ) - ) - } - compactor = _FakeCompactor() + session_service = _FakeSessionService() service = ResourceMemoryLinkService( - viking_fs=_FakeVikingFS(store), - compactor=compactor, - ) - service._run_extract_loop = AsyncMock( - return_value=( - SimpleNamespace(upsert_operations=[object()], delete_file_contents=[], errors=[]), - object(), - object(), - ) + viking_fs=_FakeVikingFS( + {"viking://resources/images/2026/06/11/.abstract.md": "动漫角色照片合集"} + ), + session_service=session_service, ) - update_result = MemoryUpdateResult() - update_result.add_written(memory_uri) - service._apply_memory_operations = AsyncMock(return_value=update_result) result = await service.on_resource_added( ctx=request_context, @@ -137,84 +119,33 @@ async def test_on_resource_added_marks_new_memories_for_compaction(request_conte source_name="yueqian.jpeg", ) + session_id = result["session_id"] assert result["status"] == "success" - assert result["managed_memory_uris"] == [memory_uri] - assert result["compaction_msg_id"] == "msg-1" - assert compactor.enqueued is True - assert compactor.marked["memory_uris"] == [memory_uri] - assert compactor.marked["created_at"] - - -def test_resource_linking_provider_detects_language_from_reason_not_resource_uri(): - provider = _ResourceLinkingProvider( - resource_uri="viking://resources/images/2026/06/10/yueqian_jpeg", - reason="这是越前龙马的照片", - source_name="yueqian.jpeg", - ) - - assert provider.get_output_language() == "zh-CN" - - -def test_resource_linking_provider_exposes_resource_uri_only_as_metadata(): - resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" - provider = _ResourceLinkingProvider( - resource_uri=resource_uri, - reason="这是越前龙马的照片", - source_name="yueqian.jpeg", - added_at="2026-06-11T08:00:00+00:00", - resource_abstract="动漫角色照片合集", - ) - - message_text = "\n".join( - part.text - for message in provider.messages - for part in message.parts - if getattr(part, "text", None) - ) - - instruction = provider.instruction() - assert resource_uri in instruction - assert resource_uri in provider._build_conversation_message()["content"] - assert resource_uri in provider.get_conversation_text() + assert session_id.startswith("resource_reason_") + assert session_service.created == [ + { + "ctx": request_context, + "session_id": session_id, + "memory_policy": { + "self": {"enabled": True}, + "peer": {"enabled": False}, + "memory_types": ["entities", "events", "preferences"], + }, + } + ] + assert session_service.committed == [ + { + "ctx": request_context, + "session_id": session_id, + "keep_recent_count": 0, + } + ] + assert session_service.deleted == [{"ctx": request_context, "session_id": session_id}] + message_text = session_service.session.messages[0]["parts"][0].text assert resource_uri in message_text - assert "2026-06-11T08:00:00+00:00" in instruction - assert "动漫角色照片合集" in instruction - assert ( - "Added at: 2026-06-11T08:00:00+00:00" - in provider._build_conversation_message()["content"] - ) - assert "Resource abstract: 动漫角色照片合集" in message_text - assert "include the exact Resource URI in the visible memory content" not in instruction - assert "Use the Resource URI only as resource identity metadata" in instruction - assert "Do NOT include raw resource URIs" in instruction - - -def test_resource_linking_prompt_prefers_natural_sentence_over_terse_label(): - provider = _ResourceLinkingProvider( - resource_uri="viking://resources/reports/gdp_pdf", - reason="这个 PDF 第 65 页的人均 GDP 数据应为 4 万", - source_name="gdp.pdf", - ) - - instruction = provider.instruction() - assert "Create/edit visible memory as durable natural sentences" in instruction - assert "user intent/judgment" in instruction - assert "rewrite terse resource labels" in instruction - assert 'reason "page 3 total should be 42"' in instruction - assert '"User said page 3 total should be 42"' in instruction - assert "merge with it" in instruction - assert "only the newest resource" in instruction - assert "enumerate/count resources" in instruction - assert "under 12 Chinese characters" in instruction - assert "under 8 English words" in instruction - assert "weak supporting context" in instruction - assert "short resource descriptor only" in instruction - assert "adds non-redundant readability" in instruction - assert "Source name alone is opaque" in instruction - assert "配置服务项目" in instruction - assert "merely repeats the subject, media type, or facts" in instruction - assert "角色照片" not in instruction - assert "身份证" not in instruction + assert "这是越前龙马的照片" in message_text + assert "yueqian.jpeg" in message_text + assert "动漫角色照片合集" in message_text @pytest.mark.asyncio @@ -262,97 +193,6 @@ async def test_read_resource_directory_abstract_ignores_missing_or_not_ready( assert not_ready == "" -@pytest.mark.asyncio -async def test_append_resource_refs_linkifies_memory_entity_name_and_removes_plain_uri( - request_context, -): - memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" - resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" - raw = MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content=f"用户上传了一张越前龙马的照片,资源 URI:{resource_uri}", - extra_fields={ - "category": "fictional_character", - "name": "越前龙马", - "memory_type": "entities", - }, - ) - ) - store = {memory_uri: raw} - service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - - await service._append_resource_refs( - memory_uris=[memory_uri], - resource_uri=resource_uri, - reason="这是越前龙马的照片", - ctx=request_context, - ) - - written = store[memory_uri] - assert f"[越前龙马]({resource_uri})" in written - assert f"资源 URI:{resource_uri}" not in written - mf = MemoryFileUtils.read(written, uri=memory_uri) - assert mf.extra_fields["resource_refs"][0]["match_text"] == "越前龙马" - assert mf.links == [] - - -@pytest.mark.asyncio -async def test_append_resource_refs_removes_colon_visible_uri_with_markdown_escape( - request_context, -): - memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" - resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" - visible_uri = "viking://resources/images/2026/06/10/yueqian\\_jpeg" - raw = MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content=f"- 越前龙马的照片资源:{visible_uri}", - extra_fields={ - "category": "fictional_character", - "name": "越前龙马", - "memory_type": "entities", - }, - ) - ) - store = {memory_uri: raw} - service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - - await service._append_resource_refs( - memory_uris=[memory_uri], - resource_uri=resource_uri, - reason="这是越前龙马的照片", - ctx=request_context, - ) - - mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) - assert mf.content == f"- [越前龙马]({resource_uri})的照片资源" - assert visible_uri not in mf.content - - -@pytest.mark.asyncio -async def test_append_resource_refs_falls_back_to_first_sentence_when_anchor_missing( - request_context, -): - memory_uri = "viking://user/ryoma/memories/entities/fictional_character/越前龙马.md" - resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg" - store = {memory_uri: "用户上传了一张角色照片。后续句子不应被链接。"} - service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - - await service._append_resource_refs( - memory_uris=[memory_uri], - resource_uri=resource_uri, - reason="这是越前龙马的照片", - ctx=request_context, - ) - - written = store[memory_uri] - assert f"[用户上传了一张角色照片。]({resource_uri})" in written - assert "后续句子不应被链接。" in written - mf = MemoryFileUtils.read(written, uri=memory_uri) - assert mf.extra_fields["resource_refs"][0]["match_text"] == "用户上传了一张角色照片。" - - @pytest.mark.asyncio async def test_find_referencing_memories_uses_memory_refs(request_context): memory_uri = "viking://user/alice/memories/entities/wang.md" diff --git a/tests/session/memory/test_memory_react_system_prompt.py b/tests/session/memory/test_memory_react_system_prompt.py index 874a98228c..7ec195873f 100644 --- a/tests/session/memory/test_memory_react_system_prompt.py +++ b/tests/session/memory/test_memory_react_system_prompt.py @@ -165,3 +165,43 @@ def test_detect_language_prefers_user_text_over_assistant_text(self): provider = SessionExtractContextProvider(messages=messages) assert provider._detect_language() == "zh-CN" + + def test_detect_language_ignores_resource_uri_latin_segments(self): + messages = [ + Message( + id="m1", + role="user", + parts=[ + TextPart( + "这是越前龙马的照片 " + "viking://resources/images/2026/06/12/yueqian_jpeg" + ) + ], + ) + ] + + provider = SessionExtractContextProvider(messages=messages) + + assert provider._detect_language() == "zh-CN" + + def test_detect_language_uses_resource_reason_not_metadata_labels(self): + messages = [ + Message( + id="m1", + role="user", + parts=[ + TextPart( + "## Resource Addition\n" + "Resource URI: viking://resources/images/2026/06/12/yueqian_jpeg\n" + "Source name: yueqian.jpeg\n" + "Added at: 2026-06-11T17:26:21.332768+00:00\n" + "Resource abstract: high-quality anime-style illustration\n" + "User reason: 这是越前龙马的照片" + ) + ], + ) + ] + + provider = SessionExtractContextProvider(messages=messages) + + assert provider._detect_language() == "zh-CN" diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index 00fbaacf7c..ae4df726c7 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -353,9 +353,13 @@ async def test_apply_operations_skips_link_updates_for_deleted_uris(self, monkey updater.generate_overview = AsyncMock() mock_viking_fs = MagicMock() - mock_viking_fs.read_file = AsyncMock( - side_effect=AssertionError("deleted URI should not be read") - ) + + async def mock_read_file(uri, **kwargs): + if uri == deleted_uri: + raise AssertionError("deleted URI should not be read") + return MemoryFileUtils.write(MemoryFile(uri=uri, content="new content")) + + mock_viking_fs.read_file = AsyncMock(side_effect=mock_read_file) mock_viking_fs.write_file = AsyncMock() updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) @@ -394,7 +398,9 @@ async def mock_apply_delete(uri, ctx): assert result.written_uris == [written_uri] assert result.deleted_uris == [deleted_uri] - mock_viking_fs.read_file.assert_not_awaited() + assert deleted_uri not in [ + call.args[0] for call in mock_viking_fs.read_file.await_args_list + ] @pytest.mark.asyncio async def test_apply_operations_routes_backlinks_to_matching_uri_only(self): @@ -537,6 +543,134 @@ async def mock_write_file(uri, content, **kwargs): assert memory["links"][0]["to_uri"] == resource_uri assert resource_uri not in store + @pytest.mark.asyncio + async def test_apply_operations_syncs_markdown_resource_refs_before_vectorize(self): + memory_uri = "viking://user/alice/memories/entities/fuji.md" + resource_uri = "viking://resources/images/2026/06/11/fuji_jpeg" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + store[uri] = content + + async def assert_vectorized_after_resource_ref_sync(*args, **kwargs): + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.extra_fields["resource_refs"][0]["source"] == "session.commit" + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock( + side_effect=assert_vectorized_after_resource_ref_sync + ) + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "不二周助", + "content": f"用户保存了一张[不二周助]({resource_uri})的照片", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + ) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == f"用户保存了一张[不二周助]({resource_uri})的照片" + assert mf.links == [] + assert mf.extra_fields["resource_refs"] == [ + { + "resource_uri": resource_uri, + "source": "session.commit", + "created_at": mf.extra_fields["resource_refs"][0]["created_at"], + "match_text": "不二周助", + } + ] + + @pytest.mark.asyncio + async def test_apply_operations_linkifies_bare_resource_uri(self): + memory_uri = "viking://user/alice/memories/entities/fuji.md" + resource_uri = "viking://resources/images/2026/06/11/fuji_jpeg" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + store[uri] = content + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock() + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "不二周助", + "content": f"今天是清明节。用户保存了一张不二周助的照片 {resource_uri}", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + ) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == f"今天是清明节。[用户保存了一张不二周助的照片]({resource_uri})" + assert mf.extra_fields["resource_refs"][0]["resource_uri"] == resource_uri + assert mf.extra_fields["resource_refs"][0]["source"] == "session.commit" + assert mf.extra_fields["resource_refs"][0]["match_text"] == "用户保存了一张不二周助的照片" + # The TestApplyWriteWithContentInFields tests are outdated because WriteOp no longer exists # The _apply_write method now accepts any flat model (dict or Pydantic model) that From 04cafc0b003b64fa68df860e304852ea6288f448 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 01:51:12 +0800 Subject: [PATCH 08/19] =?UTF-8?q?=E5=9B=9E=E6=BB=9Avlm=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models/vlm/backends/volcengine_vlm.py | 30 +++++++------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index cda87cd662..34eab984e3 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -31,16 +31,6 @@ def __init__(self, config: Dict[str, Any]): if not self.model: self.model = "doubao-seed-2-0-pro-260215" - def _ark_client_kwargs(self) -> Dict[str, Any]: - return { - "api_key": self.api_key, - "base_url": self.api_base, - "timeout": self.timeout, - # OpenViking owns retry/backoff behavior. Disable SDK retries to - # keep request duration bounded by the configured timeout. - "max_retries": 0, - } - def _parse_tool_calls(self, message) -> List[ToolCall]: """Parse tool calls from VolcEngine response message.""" tool_calls = [] @@ -88,7 +78,10 @@ def get_client(self): raise ImportError( "Please install volcenginesdkarkruntime: pip install volcenginesdkarkruntime" ) - self._sync_client = volcenginesdkarkruntime.Ark(**self._ark_client_kwargs()) + self._sync_client = volcenginesdkarkruntime.Ark( + api_key=self.api_key, + base_url=self.api_base, + ) return self._sync_client def _build_async_client(self): @@ -99,12 +92,9 @@ def _build_async_client(self): raise ImportError( "Please install volcenginesdkarkruntime: pip install volcenginesdkarkruntime" ) - return volcenginesdkarkruntime.AsyncArk(**self._ark_client_kwargs()) - - async def _create_chat_completion_async(self, client: Any, kwargs: Dict[str, Any]) -> Any: - return await asyncio.wait_for( - client.chat.completions.create(**kwargs), - timeout=self.timeout, + return volcenginesdkarkruntime.AsyncArk( + api_key=self.api_key, + base_url=self.api_base, ) def get_completion( @@ -177,7 +167,7 @@ async def get_completion_async( for attempt in range(self.max_retries + 1): try: t0 = time.perf_counter() - response = await self._create_chat_completion_async(client, kwargs) + response = await client.chat.completions.create(**kwargs) elapsed = time.perf_counter() - t0 self._update_token_usage_from_response(response, duration_seconds=elapsed) result = self._build_vlm_response(response, has_tools=bool(tools)) @@ -389,10 +379,10 @@ async def get_vision_completion_async( client = self.get_async_client() t0 = time.perf_counter() - response = await self._create_chat_completion_async(client, kwargs) + response = await client.chat.completions.create(**kwargs) elapsed = time.perf_counter() - t0 self._update_token_usage_from_response(response, duration_seconds=elapsed) result = self._build_vlm_response(response, has_tools=bool(tools)) if tools: return result - return self._clean_response(str(result)) + return self._clean_response(str(result)) \ No newline at end of file From 05e3a30165104444afdf832bfb5486e9bff4b9f5 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 01:56:02 +0800 Subject: [PATCH 09/19] =?UTF-8?q?=E5=9B=9E=E6=BB=9Arust=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/ov_cli/src/base_client.rs | 7 ++----- crates/ov_cli/src/help_ui.rs | 6 +----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/crates/ov_cli/src/base_client.rs b/crates/ov_cli/src/base_client.rs index 15ebb9f6b4..f63887aff1 100644 --- a/crates/ov_cli/src/base_client.rs +++ b/crates/ov_cli/src/base_client.rs @@ -145,7 +145,6 @@ pub struct BaseClient { pub(crate) api_key: Option, pub(crate) account: Option, pub(crate) user: Option, - timeout_secs: f64, pub(crate) profile_enabled: bool, pub(crate) extra_headers: Option>, } @@ -171,7 +170,6 @@ impl BaseClient { api_key, account, user, - timeout_secs, profile_enabled, extra_headers, } @@ -360,8 +358,7 @@ impl BaseClient { timeout: std::time::Duration, ) -> Result { let url = format!("{}{}", self.base_url, path); - let configured_timeout = std::time::Duration::from_secs_f64(self.timeout_secs); - let client = self.create_client_with_timeout(std::cmp::max(timeout, configured_timeout))?; + let client = self.create_client_with_timeout(timeout)?; let request = client.post(&url).headers(self.build_headers()).json(body); let request = if self.profile_enabled { @@ -881,4 +878,4 @@ impl<'a> FileUploader<'a> { .map(|s| s.to_string()) .ok_or_else(|| Error::Parse("Missing temp_file_id in response".to_string())) } -} +} \ No newline at end of file diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index 8cda47a8a9..6dc25b532a 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -385,10 +385,6 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ label: "--wait", description: "Wait until indexing/processing completes.", }, - HelpItem { - label: "--timeout ", - description: "Maximum wait time when using --wait.", - }, HelpItem { label: "--include / --exclude", description: "Filter files during folder import.", @@ -3328,4 +3324,4 @@ mod tests { assert!(rendered.contains("ov ls [uri]")); assert!(rendered.contains("List resources under a Viking URI.")); } -} +} \ No newline at end of file From bde7e94f54f3c968a47311188615b827374aa67a Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 12:07:12 +0800 Subject: [PATCH 10/19] bug fix --- .../prompts/templates/memory/events.yaml | 5 + .../processing/resource_unlinking.yaml | 53 ---- .../service/resource_memory_link_service.py | 256 ++++-------------- openviking/session/memory/memory_updater.py | 124 +++++++++ .../session_extract_context_provider.py | 2 + .../session/memory/utils/resource_refs.py | 211 ++++++++++++++- tests/server/test_content_write_service.py | 24 ++ .../test_resource_memory_link_service.py | 129 +++++---- tests/session/memory/test_memory_updater.py | 35 +++ 9 files changed, 528 insertions(+), 311 deletions(-) delete mode 100644 openviking/prompts/templates/processing/resource_unlinking.yaml diff --git a/openviking/prompts/templates/memory/events.yaml b/openviking/prompts/templates/memory/events.yaml index a66e916e1a..1cb6c1f9ff 100644 --- a/openviking/prompts/templates/memory/events.yaml +++ b/openviking/prompts/templates/memory/events.yaml @@ -93,9 +93,14 @@ enabled: true # upsert 表示新增或更新(默认行为) operation_mode: "add_only" content_template: | + {% set resource_event_content = extract_context.get_resource_event_content(ranges, summary) %} + {% if resource_event_content %} + {{ resource_event_content }} + {% else %} Summary: {{ summary }} {{extract_context.get_first_message_time_with_weekday_from_ranges(ranges|default(''))|default('N/A')}} ChatLog: {{ extract_context.get_event_content(ranges, summary, 0) }} + {% endif %} embedding_template: |- EventName: {{ event_name }} Goal: {{ goal }} diff --git a/openviking/prompts/templates/processing/resource_unlinking.yaml b/openviking/prompts/templates/processing/resource_unlinking.yaml deleted file mode 100644 index c870ed87b7..0000000000 --- a/openviking/prompts/templates/processing/resource_unlinking.yaml +++ /dev/null @@ -1,53 +0,0 @@ -metadata: - id: "processing.resource_unlinking" - name: "Resource Unlinking" - description: "Remove resource-derived content from user memories before resource deletion" - version: "1.0.0" - language: "en" - category: "processing" - -variables: - - name: "output_language" - type: "string" - description: "Target language for memory content" - required: true - - - name: "memory_uri" - type: "string" - description: "Memory URI to clean" - required: true - - - name: "resource_uri" - type: "string" - description: "Deleted resource URI" - required: true - - - name: "reason" - type: "string" - description: "Original add-resource reason" - required: true - -template: | - You are a memory cleanup agent for a deleted resource. - - ## Objective - Remove only the memory content that was introduced because of the deleted resource. - - ## Target Output Language - All remaining memory content MUST be written in {{ output_language }}. - - ## Deleted Resource - Resource URI: {{ resource_uri }} - Original add-resource reason: {{ reason }} - Memory URI to clean: {{ memory_uri }} - - ## Rules - - Use the preloaded memory content from the read result. - - Remove the exact Resource URI and any content that exists only because of that resource/reason. - - Preserve unrelated user memories. - - If the whole memory is only about the deleted resource, delete the memory. - - Do NOT mention that the resource was deleted in the cleaned memory unless that fact is independently worth remembering. - - Return only the necessary edit/delete memory operations. - -llm_config: - temperature: 0.0 diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index 542986eca0..d419bf691b 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -15,25 +15,24 @@ from uuid import uuid4 from openviking.core.namespace import canonical_user_root, context_type_for_uri -from openviking.message import Message from openviking.message.part import TextPart -from openviking.prompts.manager import render_prompt from openviking.server.identity import RequestContext -from openviking.session.memory.dataclass import MemoryFile, ResolvedOperations -from openviking.session.memory.extract_loop import ExtractLoop -from openviking.session.memory.memory_isolation_handler import MemoryIsolationHandler +from openviking.session.memory.dataclass import MemoryFile from openviking.session.memory.memory_updater import ( - ExtractContext, MemoryUpdater, MemoryUpdateResult, ) -from openviking.session.memory.session_extract_context_provider import SessionExtractContextProvider from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + content_references_resource, + extract_resource_uris, + remove_resource_references_from_memory, + resource_ref_matches, +) from openviking.storage import VikingDBManager from openviking.storage.viking_fs import VikingFS, get_viking_fs from openviking_cli.exceptions import NotFoundError -from openviking_cli.utils import VikingURI, get_logger -from openviking_cli.utils.config import get_openviking_config +from openviking_cli.utils import get_logger if TYPE_CHECKING: from openviking.service.session_service import SessionService @@ -56,77 +55,6 @@ class _MemoryRefMatch: resource_ref: Dict[str, Any] -class _ResourceUnlinkingProvider(SessionExtractContextProvider): - """Provider for removing resource-derived content from one memory file.""" - - def __init__( - self, - *, - memory_uri: str, - resource_uri: str, - reason: str, - memory_file: MemoryFile, - **kwargs: Any, - ): - self.memory_uri = memory_uri - self.resource_uri = resource_uri - self.reason = reason - self.memory_file = memory_file - messages = [ - Message( - id="resource-unlinking", - role="user", - parts=[ - TextPart( - text=( - "Deleted resource URI: " - f"{resource_uri}\nOriginal reason: {reason}\n" - f"Memory URI: {memory_uri}" - ) - ) - ], - ) - ] - super().__init__(messages=messages, **kwargs) - - def instruction(self) -> str: - return render_prompt( - "processing.resource_unlinking", - { - "output_language": self.get_output_language(), - "memory_uri": self.memory_uri, - "resource_uri": self.resource_uri, - "reason": self.reason, - }, - ) - - async def prefetch(self) -> List[Dict[str, Any]]: - messages = [ - { - "role": "user", - "content": ( - "## Resource Deletion Cleanup\n" - f"Deleted resource URI: {self.resource_uri}\n" - f"Original add-resource reason: {self.reason}\n" - f"Memory to clean: {self.memory_uri}\n\n" - "Use the preloaded memory content below. Output the cleanup operation " - "as a single JSON response." - ), - } - ] - await self._append_structured_read_result(messages, 0, self.memory_uri) - return messages - - def get_tools(self) -> List[str]: - return [] - - def _build_prefetch_search_query(self) -> str: - return self.reason - - def get_conversation_text(self) -> str: - return f"{self.reason}\n{self.resource_uri}\n{self.memory_uri}".strip() - - class ResourceMemoryLinkService: """Create and clean memory references for resources added with a reason.""" @@ -393,53 +321,6 @@ async def before_resource_delete( "deleted_memory_uris": list(dict.fromkeys(deleted)), } - async def _run_extract_loop( - self, - *, - provider: SessionExtractContextProvider, - ctx: RequestContext, - allowed_memory_types: Optional[set[str]] = None, - ) -> tuple[Optional[ResolvedOperations], ExtractContext, MemoryIsolationHandler]: - config = get_openviking_config() - vlm = config.vlm.get_vlm_instance() - viking_fs = self._get_viking_fs() - extract_context = provider.get_extract_context() - isolation_handler = MemoryIsolationHandler( - ctx, - extract_context, - allowed_memory_types=allowed_memory_types, - ) - provider._isolation_handler = isolation_handler - orchestrator = ExtractLoop( - vlm=vlm, - viking_fs=viking_fs, - ctx=ctx, - context_provider=provider, - isolation_handler=isolation_handler, - ) - operations, _ = await orchestrator.run() - return operations, extract_context, isolation_handler - - async def _apply_memory_operations( - self, - *, - provider: SessionExtractContextProvider, - operations: ResolvedOperations, - ctx: RequestContext, - extract_context: ExtractContext, - isolation_handler: MemoryIsolationHandler, - ) -> MemoryUpdateResult: - updater = MemoryUpdater( - registry=provider._get_registry(), - vikingdb=self._vikingdb, - ) - return await updater.apply_operations( - operations, - ctx, - extract_context=extract_context, - isolation_handler=isolation_handler, - ) - async def _cleanup_memory_reference( self, *, @@ -449,58 +330,32 @@ async def _cleanup_memory_reference( resource_uri: str, reason: str, ) -> MemoryUpdateResult: - memory_type = self._infer_memory_type(memory_uri, memory_file) - provider = _ResourceUnlinkingProvider( - memory_uri=memory_uri, - resource_uri=resource_uri, - reason=reason, - memory_file=memory_file, - ctx=ctx, - viking_fs=self._get_viking_fs(), - ) - operations, extract_context, isolation_handler = await self._run_extract_loop( - provider=provider, - ctx=ctx, - allowed_memory_types={memory_type} if memory_type else None, - ) - if not operations: - return MemoryUpdateResult() - result = await self._apply_memory_operations( - provider=provider, - operations=operations, - ctx=ctx, - extract_context=extract_context, - isolation_handler=isolation_handler, + del reason + viking_fs = self._get_viking_fs() + current = memory_file + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + current = MemoryFileUtils.read(raw, uri=memory_uri) + except (NotFoundError, FileNotFoundError): + result = MemoryUpdateResult() + result.add_deleted(memory_uri) + return result + + changed = remove_resource_references_from_memory( + current, + resource_uri, + recursive=True, ) - for uri in result.written_uris + result.edited_uris: - await self._remove_resource_refs(uri, resource_uri, ctx) - if uri == memory_uri: - await self._restore_cleanup_metadata(uri, memory_file, ctx) - if await self._delete_empty_cleanup_memory(uri, ctx): - self._mark_result_deleted(result, uri) + result = MemoryUpdateResult() + if not changed: + return result + + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(current), ctx=ctx) + result.add_edited(memory_uri) + if await self._delete_empty_cleanup_memory(memory_uri, ctx): + self._mark_result_deleted(result, memory_uri) return result - async def _restore_cleanup_metadata( - self, - memory_uri: str, - original_memory_file: MemoryFile, - ctx: RequestContext, - ) -> None: - """Keep resource cleanup from introducing schema metadata.""" - viking_fs = self._get_viking_fs() - raw = await viking_fs.read_file(memory_uri, ctx=ctx) - mf = MemoryFileUtils.read(raw, uri=memory_uri) - original_extra_keys = set((original_memory_file.extra_fields or {}).keys()) - mf.extra_fields = { - key: value for key, value in mf.extra_fields.items() if key in original_extra_keys - } - mf.memory_type = original_memory_file.memory_type - if not original_memory_file.links: - mf.links = [] - if not original_memory_file.backlinks: - mf.backlinks = [] - await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) - async def _delete_empty_cleanup_memory(self, memory_uri: str, ctx: RequestContext) -> bool: """Delete memory files whose visible content was emptied by resource cleanup.""" if context_type_for_uri(memory_uri) != "memory": @@ -585,6 +440,29 @@ async def _find_referencing_memories( for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive): matches.append(_MemoryRefMatch(uri, mf, ref)) + if not any(match.memory_uri == uri for match in matches) and content_references_resource( + mf.content, + resource_uri, + recursive=recursive, + ): + matched_uri = next( + ( + item + for item in extract_resource_uris(mf.content) + if self._resource_ref_matches(item, resource_uri, recursive) + ), + resource_uri, + ) + matches.append( + _MemoryRefMatch( + uri, + mf, + { + "resource_uri": matched_uri, + "source": "visible_content", + }, + ) + ) return matches async def _read_resource_directory_abstract( @@ -675,28 +553,4 @@ def _resource_ref_matches( target_uri: str, recursive: bool, ) -> bool: - if not isinstance(ref_uri, str) or not ref_uri: - return False - normalized_ref = ref_uri.rstrip("/") - normalized_target = target_uri.rstrip("/") - if normalized_ref == normalized_target: - return True - return recursive and normalized_ref.startswith(normalized_target + "/") - - @staticmethod - def _infer_memory_type(memory_uri: str, memory_file: MemoryFile) -> str: - memory_type = ( - memory_file.memory_type - or memory_file.extra_fields.get("memory_type") - or "" - ) - if memory_type: - return str(memory_type) - parts = [part for part in VikingURI.normalize(memory_uri).split("/") if part] - try: - idx = parts.index("memories") - except ValueError: - return "" - if len(parts) > idx + 1: - return parts[idx + 1].replace(".md", "") - return "" + return resource_ref_matches(ref_uri, target_uri, recursive=recursive) diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 3ba3aa1c7c..2590a5f638 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -46,6 +46,14 @@ _EXTRACTION_CHUNK_MIN_CHARS = 100 _EXTRACTION_CHUNK_BOUNDARY_RE = re.compile(r"(\n+|[。!?;!?;]+|(? str: + """Return a user-readable event body for add-resource derived events.""" + if not ranges_str: + return "" + additions = self._resource_additions_from_ranges(ranges_str) + if not additions: + return "" + addition = additions[0] + resource_uri = addition.get("Resource URI", "") + if not resource_uri: + return "" + return self._link_resource_summary(summary or "", resource_uri, addition).strip() + + def _resource_additions_from_ranges(self, ranges_str: str) -> List[Dict[str, str]]: + msg_range = self.read_message_ranges(ranges_str) + additions: List[Dict[str, str]] = [] + for msg_group in msg_range.elements: + for msg in msg_group: + text = self._message_text(msg) + if "## Resource Addition" not in text: + continue + fields = { + match.group(1): match.group(2).strip() + for match in _RESOURCE_ADDITION_FIELD_RE.finditer(text) + } + if fields.get("Resource URI"): + additions.append(fields) + return additions + + @staticmethod + def _message_text(message: Message) -> str: + parts = getattr(message, "parts", []) + texts = [part.text for part in parts if isinstance(part, TextPart) and part.text] + if texts: + return "\n".join(texts) + return message.content or "" + + @classmethod + def _link_resource_summary( + cls, + summary: str, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + text = (summary or "").strip() + if not text: + return cls._resource_addition_fallback_sentence(resource_uri, addition) + if f"]({resource_uri})" in text: + return text + if resource_uri in text: + return cls._replace_bare_resource_uri(text, resource_uri, addition) + label = cls._resource_label_from_addition(addition) + return cls._finish_sentence(f"{text.rstrip('。.!')},关联资源为[{label}]({resource_uri})") + + @classmethod + def _replace_bare_resource_uri( + cls, + text: str, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + uri_start = text.find(resource_uri) + if uri_start < 0: + return text + prefix = text[:uri_start] + suffix = text[uri_start + len(resource_uri) :] + marker = _RESOURCE_URI_MARKER_RE.search(prefix) + if marker: + visible_prefix = prefix[: marker.start()].rstrip(",,;;:: ") + label = cls._resource_clause_from_summary_prefix(visible_prefix) + if not label: + label = cls._resource_label_from_addition(addition) + if label and visible_prefix.endswith(label): + visible_prefix = visible_prefix[: -len(label)] + f"[{label}]({resource_uri})" + else: + visible_prefix = f"{visible_prefix}[{label}]({resource_uri})" + return cls._finish_sentence(visible_prefix) + + label = cls._resource_label_from_addition(addition) + return cls._finish_sentence(f"{prefix.rstrip()}[{label}]({resource_uri}){suffix.strip()}") + + @staticmethod + def _resource_clause_from_summary_prefix(prefix: str) -> str: + text = prefix.strip(",,;;:: ") + tail = re.split(r"[,,;;。.!??]", text)[-1].strip() + return tail if 0 < len(tail) <= 120 else "" + + @classmethod + def _resource_label_from_addition(cls, addition: Dict[str, str]) -> str: + reason = addition.get("User reason", "").strip() + for prefix in ("这是一张", "这是一个", "该资源是", "这个是", "这是"): + if reason.startswith(prefix): + reason = reason[len(prefix) :].strip() + break + reason = reason.strip("。.!! ") + if reason: + return reason[:80] + source_name = addition.get("Source name", "").strip() + return source_name or "相关资源" + + @classmethod + def _resource_addition_fallback_sentence( + cls, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + label = cls._resource_label_from_addition(addition) + return f"用户保存了[{label}]({resource_uri})。" + + @staticmethod + def _finish_sentence(text: str) -> str: + text = text.strip(",,;;:: ") + if text.endswith(("。", ".", "!", "!", "?", "?")): + return text + return text + "。" + def read_message_ranges(self, ranges_str: str) -> "MessageRange": """Parse ranges string like "0-10,50-60" or "7,9,11,13" and return combined MessageRange. diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 1a0a5159d5..33d65830a7 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -170,6 +170,8 @@ def instruction(self) -> str: ## Resource URI Handling - If the conversation contains a `viking://resources/...` URI and the user says a durable fact, judgment, preference, or event about it, extract that memory into the appropriate normal memory type such as entities, events, or preferences. - Preserve resource references as markdown links in visible memory content when useful. Example: user said "用户保存了越前龙马照片 viking://resources/images/ryoma" -> write "用户保存了[越前龙马照片](viking://resources/images/ryoma)". +- For `## Resource Addition` blocks, use `User reason` as the user's intent and `Resource abstract` only as optional context. Do not copy raw fields such as `Resource URI`, `Added at`, `Resource abstract`, or `User reason` into visible memory content. +- Use descriptive link text such as `[越前龙马照片](viking://resources/...)`; avoid visible wording like `资源URI为` or `Resource URI`. - If the user already wrote `[text](viking://resources/...)`, keep the same resource link intent. - Do NOT claim you inspected, summarized, OCRed, or opened the resource file unless the conversation explicitly provides that fact. diff --git a/openviking/session/memory/utils/resource_refs.py b/openviking/session/memory/utils/resource_refs.py index 222b4db45d..d2ba690373 100644 --- a/openviking/session/memory/utils/resource_refs.py +++ b/openviking/session/memory/utils/resource_refs.py @@ -6,7 +6,7 @@ import re from datetime import datetime, timezone -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, Iterable, List, Optional, Sequence from openviking.session.memory.dataclass import MemoryFile @@ -20,6 +20,13 @@ _TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" _SENTENCE_BOUNDARIES = "。!?.!?\n" _MAX_LINKIFIED_SENTENCE_CHARS = 160 +_RESOURCE_CLEANUP_ARTIFACT_LINE_RE = re.compile( + r"(?m)^(?:None ChatLog:|Original reason:\s*|Memory URI:\s*viking://user/[^\n]*)\n?" +) +_RESOURCE_URI_MARKER_RE = re.compile( + r"[,,;;::\s]*(?:资源\s*URI\s*为|资源\s*URI|Resource\s+URI)\s*[::为]?\s*$", + re.IGNORECASE, +) def sync_memory_resource_refs( @@ -58,6 +65,96 @@ def coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: return _coerce_resource_refs(value) +def content_references_resource( + content: str, + resource_uri: str, + *, + recursive: bool = False, +) -> bool: + """Return whether visible memory content references a resource URI.""" + return any( + resource_ref_matches(uri, resource_uri, recursive=recursive) + for uri in extract_resource_uris(content) + ) + + +def extract_resource_uris(content: str) -> List[str]: + """Extract visible resource URIs from markdown links or bare URI text.""" + uris: List[str] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + uri = _trim_resource_uri(match.group(2).strip()) + if uri: + uris.append(uri) + for match in _RESOURCE_URI_RE.finditer(content or ""): + uri = _trim_resource_uri(match.group(0)) + if uri: + uris.append(uri) + return list(dict.fromkeys(uris)) + + +def remove_resource_references_from_memory( + mf: MemoryFile, + resource_uri: str, + *, + recursive: bool = False, +) -> bool: + """Remove visible references and MEMORY_FIELDS.resource_refs for one resource.""" + before_content = mf.content + before_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + + mf.content = remove_resource_references_from_content( + mf.content, + resource_uri, + recursive=recursive, + ) + refs = [ + ref + for ref in before_refs + if not resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=recursive) + ] + if refs: + mf.extra_fields["resource_refs"] = refs + else: + mf.extra_fields.pop("resource_refs", None) + + return before_content != mf.content or before_refs != refs + + +def remove_resource_references_from_content( + content: str, + resource_uri: str, + *, + recursive: bool = False, +) -> str: + """Remove sentences/list lines that contain matching resource URI references.""" + text = content or "" + spans = _matching_resource_reference_spans(text, resource_uri, recursive=recursive) + if not spans: + return text + + sentence_spans = _merge_spans( + _expand_to_sentence_span(text, start, end) for start, end in spans + ) + for start, end in reversed(sentence_spans): + text = text[:start] + text[end:] + return _normalize_removed_reference_text(text) + + +def resource_ref_matches( + ref_uri: Any, + target_uri: str, + *, + recursive: bool, +) -> bool: + if not isinstance(ref_uri, str) or not ref_uri: + return False + normalized_ref = _trim_resource_uri(ref_uri).rstrip("/") + normalized_target = _trim_resource_uri(target_uri).rstrip("/") + if normalized_ref == normalized_target: + return True + return recursive and normalized_ref.startswith(normalized_target + "/") + + def _protected_code_spans(content: str) -> List[tuple[int, int]]: spans = [(match.start(), match.end()) for match in _CODE_BLOCK_RE.finditer(content or "")] spans.extend((match.start(), match.end()) for match in _INLINE_CODE_RE.finditer(content or "")) @@ -110,13 +207,25 @@ def _linkify_bare_resource_uris( sentence_start, sentence_end = sentence_span if end > covered_start: continue - anchor = updated[sentence_start:sentence_end] + anchor_start = sentence_start + anchor_end = sentence_end + anchor = updated[anchor_start:anchor_end] + marker_span = _resource_uri_marker_span(anchor) + if marker_span: + label_span = _resource_clause_span_before_marker( + updated, + sentence_start, + sentence_start + marker_span[0], + ) + if label_span: + anchor_start, anchor_end = label_span + anchor = updated[anchor_start:anchor_end] if "viking://resources/" in anchor or "](" in anchor: continue refs[-1]["match_text"] = anchor replacement = f"[{anchor}]({resource_uri})" - updated = updated[:sentence_start] + replacement + updated[end:] - covered_start = sentence_start + updated = updated[:anchor_start] + replacement + updated[end:] + covered_start = anchor_start refs.reverse() return updated, refs @@ -221,6 +330,100 @@ def _trim_resource_uri(resource_uri: str) -> str: return (resource_uri or "").rstrip(_TRAILING_URI_PUNCTUATION) +def _matching_resource_reference_spans( + content: str, + resource_uri: str, + *, + recursive: bool, +) -> List[tuple[int, int]]: + spans: List[tuple[int, int]] = [] + markdown_spans: List[tuple[int, int]] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + markdown_spans.append((match.start(), match.end())) + if resource_ref_matches(match.group(2), resource_uri, recursive=recursive): + spans.append((match.start(), match.end())) + + for match in _RESOURCE_URI_RE.finditer(content or ""): + resource_end = match.start() + len(_trim_resource_uri(match.group(0))) + if _overlaps_spans(match.start(), resource_end, markdown_spans): + continue + if resource_ref_matches(match.group(0), resource_uri, recursive=recursive): + spans.append((match.start(), resource_end)) + return spans + + +def _resource_uri_marker_span(anchor: str) -> Optional[tuple[int, int]]: + match = _RESOURCE_URI_MARKER_RE.search(anchor) + if not match: + return None + return match.start(), match.end() + + +def _resource_clause_span_before_marker( + content: str, + sentence_start: int, + marker_start: int, +) -> Optional[tuple[int, int]]: + prefix = content[sentence_start:marker_start].rstrip(",,;;:: ") + if not prefix: + return None + + pieces = list(re.finditer(r"[^,,;;。.!??]+$", prefix)) + if not pieces: + return None + label_start = sentence_start + pieces[-1].start() + label_end = sentence_start + pieces[-1].end() + if _valid_resource_clause(content[label_start:label_end]): + return label_start, label_end + return None + + +def _valid_resource_clause(clause: str) -> bool: + clause = clause.strip() + return bool(clause) and len(clause) <= 120 and "\n" not in clause and "](" not in clause + + +def _expand_to_sentence_span(content: str, start: int, end: int) -> tuple[int, int]: + span_start = start + for idx in range(start - 1, -1, -1): + if content[idx] in _SENTENCE_BOUNDARIES: + span_start = idx + 1 + break + else: + span_start = 0 + + span_end = end + for idx in range(end, len(content)): + if content[idx] in _SENTENCE_BOUNDARIES: + span_end = idx + 1 + break + else: + span_end = len(content) + + while span_start < span_end and content[span_start].isspace(): + span_start += 1 + while span_end < len(content) and content[span_end].isspace(): + span_end += 1 + return span_start, span_end + + +def _merge_spans(spans: Iterable[tuple[int, int]]) -> List[tuple[int, int]]: + merged: List[tuple[int, int]] = [] + for start, end in sorted(spans): + if not merged or start > merged[-1][1]: + merged.append((start, end)) + else: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + return merged + + +def _normalize_removed_reference_text(content: str) -> str: + content = _RESOURCE_CLEANUP_ARTIFACT_LINE_RE.sub("", content) + text = re.sub(r"[ \t]+([,。;:!?,.!?;:])", r"\1", content) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + def _overlaps_spans( start: int, end: int, diff --git a/tests/server/test_content_write_service.py b/tests/server/test_content_write_service.py index 86945311a0..898b9c6f5f 100644 --- a/tests/server/test_content_write_service.py +++ b/tests/server/test_content_write_service.py @@ -168,6 +168,30 @@ async def test_memory_write_linkifies_bare_resource_uri_previous_sentence(servic assert mf.links == [] +@pytest.mark.asyncio +async def test_memory_write_linkifies_resource_uri_marker_with_readable_anchor(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write( + memory_uri, + content=f"2026-06-12,用户保存了粉丝创作的越前龙马动漫插画资源,资源URI为{resource_uri}。", + ctx=ctx, + mode="replace", + ) + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + refs = mf.extra_fields["resource_refs"] + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "用户保存了粉丝创作的越前龙马动漫插画资源" + assert mf.links == [] + + @pytest.mark.asyncio async def test_memory_write_ignores_resource_uri_in_inline_code(service): ctx = RequestContext(user=service.user, role=Role.USER) diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index 4ebe912aca..36efd05abd 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -273,32 +273,6 @@ async def test_cleanup_memory_reference_does_not_introduce_schema_metadata(reque ) store = {memory_uri: original_raw} service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - service._run_extract_loop = AsyncMock(return_value=(object(), object(), object())) - - async def fake_apply_memory_operations(**kwargs): - store[memory_uri] = MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content="今天是清明节。", - memory_type="entities", - extra_fields={ - "category": "anime_character", - "name": "不二周助", - "user_id": "ryoma", - "resource_refs": [ - { - "resource_uri": resource_uri, - "source": "content.write", - } - ], - }, - ) - ) - result = MemoryUpdateResult() - result.add_edited(memory_uri) - return result - - service._apply_memory_operations = AsyncMock(side_effect=fake_apply_memory_operations) result = await service._cleanup_memory_reference( ctx=request_context, @@ -336,39 +310,12 @@ async def test_cleanup_memory_reference_deletes_empty_memory_shell( ) store = {memory_uri: original_raw} service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) - service._run_extract_loop = AsyncMock(return_value=(object(), object(), object())) refresh_overview = AsyncMock() monkeypatch.setattr( "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", refresh_overview, ) - async def fake_apply_memory_operations(**kwargs): - store[memory_uri] = MemoryFileUtils.write( - MemoryFile( - uri=memory_uri, - content="", - memory_type="entities", - extra_fields={ - "category": "动漫角色", - "name": "越前龙马", - "user_id": "ryoma", - "memory_type": "entities", - "resource_refs": [ - { - "resource_uri": resource_uri, - "source": "add_resource.reason", - } - ], - }, - ) - ) - result = MemoryUpdateResult() - result.add_edited(memory_uri) - return result - - service._apply_memory_operations = AsyncMock(side_effect=fake_apply_memory_operations) - result = await service._cleanup_memory_reference( ctx=request_context, memory_uri=memory_uri, @@ -384,6 +331,82 @@ async def fake_apply_memory_operations(**kwargs): refresh_overview.assert_awaited_once() +@pytest.mark.asyncio +async def test_before_resource_delete_cleans_visible_uri_without_resource_refs( + request_context, + monkeypatch, +): + memory_uri = "viking://user/alice/memories/events/2026/06/11/yueqian.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + "今天是清明节。\n" + f"用户昨晚查看了[越前龙马照片]({resource_uri}),之后可参考该资源。" + ), + extra_fields={"memory_type": "events"}, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + assert result["memory_uris"] == [memory_uri] + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == "今天是清明节。" + assert "resource_refs" not in mf.extra_fields + + +@pytest.mark.asyncio +async def test_before_resource_delete_deletes_previous_failed_cleanup_artifact( + request_context, + monkeypatch, +): + memory_uri = "viking://user/alice/memories/events/2026/06/11/yueqian.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + f"Summary: 用户查看了[越前龙马照片]({resource_uri})。\n" + "None ChatLog:\n" + f"[[user]: Deleted resource URI:]({resource_uri})\n" + "Original reason: \n" + f"Memory URI: {memory_uri}" + ), + extra_fields={"memory_type": "events"}, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + assert result["deleted_memory_uris"] == [memory_uri] + assert memory_uri not in store + refresh_overview.assert_awaited_once() + + @pytest.mark.asyncio async def test_assert_resource_unlinked_propagates_non_not_found_errors(request_context): service = ResourceMemoryLinkService(viking_fs=_ReadFailVikingFS()) diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index ae4df726c7..6af0278bf8 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -102,6 +102,41 @@ def test_extract_context_initializes_page_id_map(self): page_id = extract_context.page_id_map.get_page_id("viking://user/a/memories/profile.md") assert page_id == 1 + def test_extract_context_resource_event_content_hides_add_resource_fields(self): + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + extract_context = ExtractContext( + messages=[ + Message( + id="1", + role="user", + parts=[ + TextPart( + text=( + "## Resource Addition\n" + f"Resource URI: {resource_uri}\n" + "Source name: yueqian.jpeg\n" + "Added at: 2026-06-12T03:43:36.343325+00:00\n" + "Resource abstract: This directory contains an anime illustration.\n" + "User reason: 这是越前龙马的照片" + ) + ) + ], + created_at="2026-06-12T03:43:36.343325+00:00", + ) + ] + ) + + content = extract_context.get_resource_event_content( + "0", + f"2026-06-12,用户保存了粉丝创作的越前龙马动漫插画资源,资源URI为{resource_uri}。", + ) + + assert content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + assert "Resource URI" not in content + assert "Added at" not in content + assert "Resource abstract" not in content + assert "User reason" not in content + def test_create(self): """Test creating a MemoryUpdater.""" updater = MemoryUpdater() From 8e74f550f1c7181394ef1ef64ea2106c5f49a46f Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 16:18:08 +0800 Subject: [PATCH 11/19] =?UTF-8?q?=E5=85=BC=E5=AE=B9peers,=20user=20?= =?UTF-8?q?=E4=BD=9C=E7=94=A8=E5=9F=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../session_extract_context_provider.py | 35 ++++++++++++++----- .../session/memory/utils/resource_refs.py | 20 +++++++++-- tests/server/test_content_write_service.py | 28 +++++++++++++++ .../memory/test_memory_react_system_prompt.py | 31 ++++++++++++++++ 4 files changed, 103 insertions(+), 11 deletions(-) diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 33d65830a7..e2b6947d04 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -29,6 +29,7 @@ add_tool_call_pair_to_messages, get_tool, ) +from openviking.session.memory.utils.resource_refs import contains_resource_uri from openviking.session.memory.utils.uri import render_template from openviking.storage.viking_fs import VikingFS from openviking.telemetry import tracer @@ -147,8 +148,33 @@ def _language_signal_text(text: str, *, strip_language_detection_noise) -> str: def get_output_language(self) -> str: return self._output_language + def _conversation_contains_resource_uri(self) -> bool: + for message in self.messages or []: + content = getattr(message, "content", None) + if content and contains_resource_uri(content): + return True + for part in getattr(message, "parts", []) or []: + text = getattr(part, "text", None) + if text and contains_resource_uri(text): + return True + return False + def instruction(self) -> str: output_language = self._output_language + resource_uri_handling = ( + """ + +## Resource URI Handling +- If the conversation contains a resource URI (`viking://resources/...`, `viking://user/{user_id}/resources/...`, or `viking://user/{user_id}/peers/{peer_id}/resources/...`) and the user says a durable fact, judgment, preference, or event about it, extract that memory into the appropriate normal memory type such as entities, events, or preferences. +- Preserve resource references as markdown links in visible memory content when useful. Example: user said "用户保存了越前龙马照片 viking://resources/images/ryoma" -> write "用户保存了[越前龙马照片](viking://resources/images/ryoma)". +- For `## Resource Addition` blocks, use `User reason` as the user's intent and `Resource abstract` only as optional context. Do not copy raw fields such as `Resource URI`, `Added at`, `Resource abstract`, or `User reason` into visible memory content. +- Use descriptive link text such as `[越前龙马照片](viking://resources/...)`; avoid visible wording like `资源URI为` or `Resource URI`. +- If the user already wrote a markdown link to a resource URI, keep the same resource link intent. +- Do NOT claim you inspected, summarized, OCRed, or opened the resource file unless the conversation explicitly provides that fact. +""" + if self._conversation_contains_resource_uri() + else "" + ) goal = f"""You are a memory extraction agent. Your task is to analyze conversations and update memories. ## Workflow @@ -166,14 +192,7 @@ def instruction(self) -> str: ## URI Handling The system automatically generates URIs based on memory_type and fields. Just provide correct memory_type and fields. - -## Resource URI Handling -- If the conversation contains a `viking://resources/...` URI and the user says a durable fact, judgment, preference, or event about it, extract that memory into the appropriate normal memory type such as entities, events, or preferences. -- Preserve resource references as markdown links in visible memory content when useful. Example: user said "用户保存了越前龙马照片 viking://resources/images/ryoma" -> write "用户保存了[越前龙马照片](viking://resources/images/ryoma)". -- For `## Resource Addition` blocks, use `User reason` as the user's intent and `Resource abstract` only as optional context. Do not copy raw fields such as `Resource URI`, `Added at`, `Resource abstract`, or `User reason` into visible memory content. -- Use descriptive link text such as `[越前龙马照片](viking://resources/...)`; avoid visible wording like `资源URI为` or `Resource URI`. -- If the user already wrote `[text](viking://resources/...)`, keep the same resource link intent. -- Do NOT claim you inspected, summarized, OCRed, or opened the resource file unless the conversation explicitly provides that fact. +{resource_uri_handling} ## Self and Peer Memory When a memory item describes the current user, omit peer_id. diff --git a/openviking/session/memory/utils/resource_refs.py b/openviking/session/memory/utils/resource_refs.py index d2ba690373..48b10d6c10 100644 --- a/openviking/session/memory/utils/resource_refs.py +++ b/openviking/session/memory/utils/resource_refs.py @@ -13,8 +13,17 @@ RESOURCE_REF_SOURCE_CONTENT_WRITE = "content.write" RESOURCE_REF_SOURCE_SESSION_COMMIT = "session.commit" -_MARKDOWN_RESOURCE_LINK_RE = re.compile(r"\[([^\]\n]+)\]\((viking://resources/[^)\s]+)\)") -_RESOURCE_URI_RE = re.compile(r"viking://resources/[^\s<>\]\)\"']+") +_RESOURCE_URI_PATTERN = ( + r"viking://(?:" + r"resources(?:/[^\s<>\]\)\"']*)?" + r"|user/[^/\s<>\]\)\"']+/(?:" + r"resources(?:/[^\s<>\]\)\"']*)?" + r"|peers/[^/\s<>\]\)\"']+/resources(?:/[^\s<>\]\)\"']*)?" + r")" + r")" +) +_MARKDOWN_RESOURCE_LINK_RE = re.compile(rf"\[([^\]\n]+)\]\(({_RESOURCE_URI_PATTERN})\)") +_RESOURCE_URI_RE = re.compile(_RESOURCE_URI_PATTERN) _CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) _INLINE_CODE_RE = re.compile(r"`[^`\n]+`") _TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" @@ -65,6 +74,11 @@ def coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: return _coerce_resource_refs(value) +def contains_resource_uri(content: str) -> bool: + """Return whether text contains any supported resource URI form.""" + return bool(_RESOURCE_URI_RE.search(content or "")) + + def content_references_resource( content: str, resource_uri: str, @@ -220,7 +234,7 @@ def _linkify_bare_resource_uris( if label_span: anchor_start, anchor_end = label_span anchor = updated[anchor_start:anchor_end] - if "viking://resources/" in anchor or "](" in anchor: + if contains_resource_uri(anchor) or "](" in anchor: continue refs[-1]["match_text"] = anchor replacement = f"[{anchor}]({resource_uri})" diff --git a/tests/server/test_content_write_service.py b/tests/server/test_content_write_service.py index 898b9c6f5f..ee13babebb 100644 --- a/tests/server/test_content_write_service.py +++ b/tests/server/test_content_write_service.py @@ -144,6 +144,34 @@ async def test_memory_write_adds_resource_refs_for_markdown_resource_link(servic assert mf.links == [] +@pytest.mark.parametrize( + "resource_uri", + [ + "viking://user/test_user/resources/images/2026/06/10/yueqian_jpeg", + "viking://user/test_user/peers/fuji/resources/images/2026/06/10/yueqian_jpeg", + ], +) +@pytest.mark.asyncio +async def test_memory_write_adds_resource_refs_for_user_scoped_resource_links( + service, + resource_uri, +): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + content = f"用户上传了一张[越前龙马]({resource_uri})的照片" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + refs = mf.extra_fields["resource_refs"] + assert mf.content == content + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "越前龙马" + + @pytest.mark.asyncio async def test_memory_write_linkifies_bare_resource_uri_previous_sentence(service): ctx = RequestContext(user=service.user, role=Role.USER) diff --git a/tests/session/memory/test_memory_react_system_prompt.py b/tests/session/memory/test_memory_react_system_prompt.py index 7ec195873f..191efd8a67 100644 --- a/tests/session/memory/test_memory_react_system_prompt.py +++ b/tests/session/memory/test_memory_react_system_prompt.py @@ -49,6 +49,37 @@ def test_instruction_explains_peer_memory_routing(self): assert "profile/preferences/entities/events" in instruction assert "cases/patterns/tools/skills" in instruction + def test_instruction_omits_resource_uri_handling_without_resource_uri(self): + provider = SessionExtractContextProvider( + messages=[Message(id="m1", role="user", parts=[TextPart("我喜欢越前龙马。")])] + ) + + instruction = provider.instruction() + + assert "Resource URI Handling" not in instruction + + def test_instruction_includes_resource_uri_handling_for_user_scoped_resource_uri(self): + provider = SessionExtractContextProvider( + messages=[ + Message( + id="m1", + role="user", + parts=[ + TextPart( + "这张图是越前龙马:" + "viking://user/ryoma/peers/fuji/resources/images/yueqian_jpeg" + ) + ], + ) + ] + ) + + instruction = provider.instruction() + + assert "Resource URI Handling" in instruction + assert "viking://user/{user_id}/resources/..." in instruction + assert "viking://user/{user_id}/peers/{peer_id}/resources/..." in instruction + class TestSkillToolCallExposure: def test_assemble_conversation_includes_skill_tool_call(self): From 79796d4fe2763bd60eaccda565376c57a6158e7a Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 16:33:16 +0800 Subject: [PATCH 12/19] bug fix --- openviking/service/fs_service.py | 91 +++++++++++++------ tests/service/test_fs_service.py | 146 ++++++++++++++++++++++++++++++- 2 files changed, 209 insertions(+), 28 deletions(-) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index 799b21fc67..92db4d5a9f 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -190,44 +190,81 @@ async def rm( context_type = context_type_for_uri(uri) refresh_parent_uri = self._semantic_refresh_parent_uri(uri, context_type) memory_overview_uri = self._memory_overview_parent_uri(uri, context_type) - if self._resource_memory_link_service and context_type == "resource": - cleanup_result = await self._resource_memory_link_service.before_resource_delete( - ctx=ctx, - resource_uri=uri, - recursive=recursive, - ) result = await viking_fs.rm(uri, recursive=recursive, ctx=ctx) queue_status = None - if refresh_parent_uri: - await self._enqueue_delete_refresh( - root_uri=refresh_parent_uri, - deleted_uri=uri, - context_type=context_type, - ctx=ctx, - ) - if wait: + request_registered = False + telemetry_id = get_current_telemetry().telemetry_id + try: + if refresh_parent_uri: + if wait and telemetry_id: + get_request_wait_tracker().register_request(telemetry_id) + request_registered = True + await self._enqueue_delete_refresh( + root_uri=refresh_parent_uri, + deleted_uri=uri, + context_type=context_type, + ctx=ctx, + ) + if self._resource_memory_link_service and context_type == "resource": + cleanup_result = await self._resource_memory_link_service.before_resource_delete( + ctx=ctx, + resource_uri=uri, + recursive=recursive, + ) + if memory_overview_uri: + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=memory_overview_uri, + ctx=ctx, + ) + for cleanup_overview_uri in self._memory_overview_parent_uris_from_cleanup( + cleanup_result + ): + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=cleanup_overview_uri, + ctx=ctx, + ) + if refresh_parent_uri and wait: queue_status = await self._wait_for_refresh(timeout=timeout) - if memory_overview_uri: - await MemoryUpdater.refresh_schema_overview( - viking_fs=viking_fs, - directory_uri=memory_overview_uri, - ctx=ctx, - ) - for cleanup_overview_uri in self._memory_overview_parent_uris_from_cleanup(cleanup_result): - await MemoryUpdater.refresh_schema_overview( - viking_fs=viking_fs, - directory_uri=cleanup_overview_uri, - ctx=ctx, - ) + finally: + if request_registered: + get_request_wait_tracker().cleanup(telemetry_id) if cleanup_result is not None and isinstance(result, dict): result["memory_cleanup"] = cleanup_result if refresh_parent_uri and isinstance(result, dict): result["semantic_root_uri"] = refresh_parent_uri - result["semantic_status"] = "complete" if wait else "queued" + result["semantic_status"] = self._semantic_refresh_status( + wait=wait, + queue_status=queue_status, + ) if queue_status is not None: result["queue_status"] = queue_status return result + @staticmethod + def _semantic_refresh_status( + *, + wait: bool, + queue_status: Optional[Dict[str, Any]], + ) -> str: + if not wait: + return "queued" + if not isinstance(queue_status, dict): + return "complete" + semantic = queue_status.get("Semantic", {}) + if not isinstance(semantic, dict): + return "complete" + try: + if int(semantic.get("error_count", 0) or 0) > 0: + return "failed" + except (TypeError, ValueError): + if semantic.get("errors"): + return "failed" + if semantic.get("errors"): + return "failed" + return "complete" + @staticmethod def _semantic_refresh_parent_uri(uri: str, context_type: str) -> Optional[str]: if context_type != "resource": diff --git a/tests/service/test_fs_service.py b/tests/service/test_fs_service.py index f1e3393d48..5e9db932cb 100644 --- a/tests/service/test_fs_service.py +++ b/tests/service/test_fs_service.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for file-system service coordination behavior.""" +from types import SimpleNamespace from unittest.mock import AsyncMock import pytest @@ -12,11 +13,14 @@ class _FakeVikingFS: - def __init__(self): + def __init__(self, *, rm_error=None): self.rm_calls = [] + self.rm_error = rm_error async def rm(self, uri, recursive=False, ctx=None): self.rm_calls.append({"uri": uri, "recursive": recursive, "ctx": ctx}) + if self.rm_error: + raise self.rm_error return {"estimated_deleted_count": 3} @@ -30,6 +34,56 @@ async def before_resource_delete(self, *, ctx, resource_uri, recursive=False): return self.result +class _FakeWaitTracker: + def __init__(self): + self.registered_requests = [] + self.registered_roots = [] + self.wait_calls = [] + self.cleaned = [] + + def register_request(self, telemetry_id): + self.registered_requests.append(telemetry_id) + + def register_semantic_root(self, telemetry_id, semantic_msg_id): + self.registered_roots.append( + { + "telemetry_id": telemetry_id, + "semantic_msg_id": semantic_msg_id, + "request_was_registered": telemetry_id in self.registered_requests, + } + ) + + async def wait_for_request(self, telemetry_id, timeout=None): + self.wait_calls.append((telemetry_id, timeout)) + + def build_queue_status(self, telemetry_id): + return { + "Semantic": {"processed": 1, "error_count": 0, "errors": []}, + "Embedding": {"processed": 0, "error_count": 0, "errors": []}, + } + + def mark_semantic_failed(self, telemetry_id, semantic_msg_id, message): + pass + + def cleanup(self, telemetry_id): + self.cleaned.append(telemetry_id) + + +class _FakeQueueManager: + SEMANTIC = "semantic" + + def __init__(self): + self.messages = [] + + def get_queue(self, name, allow_create=False): + assert name == self.SEMANTIC + assert allow_create is True + return self + + async def enqueue(self, msg): + self.messages.append(msg) + + @pytest.fixture def request_context(): return RequestContext( @@ -67,6 +121,33 @@ async def test_resource_rm_enqueues_parent_delete_refresh_and_waits(request_cont assert result["queue_status"] == {"Semantic": {"pending_count": 0}} +@pytest.mark.asyncio +async def test_resource_rm_reports_failed_semantic_status_when_wait_queue_has_errors( + request_context, +): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock( + return_value={ + "Semantic": { + "processed": 1, + "error_count": 1, + "errors": [{"message": "refresh failed"}], + } + } + ) + + result = await service.rm( + "viking://resources/images/2026/06/10/不二周助_jpeg", + ctx=request_context, + recursive=True, + wait=True, + ) + + assert result["semantic_status"] == "failed" + + @pytest.mark.asyncio async def test_resource_rm_without_wait_only_queues_refresh(request_context): viking_fs = _FakeVikingFS() @@ -82,6 +163,69 @@ async def test_resource_rm_without_wait_only_queues_refresh(request_context): assert result["semantic_status"] == "queued" +@pytest.mark.asyncio +async def test_resource_rm_wait_registers_request_before_semantic_root( + request_context, + monkeypatch, +): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + tracker = _FakeWaitTracker() + queue_manager = _FakeQueueManager() + + monkeypatch.setattr( + "openviking.service.fs_service.get_current_telemetry", + lambda: SimpleNamespace(telemetry_id="tm-fs-rm"), + ) + monkeypatch.setattr( + "openviking.service.fs_service.get_request_wait_tracker", + lambda: tracker, + ) + monkeypatch.setattr( + "openviking.service.fs_service.get_queue_manager", + lambda: queue_manager, + ) + + result = await service.rm( + "viking://resources/images/2026/06/10/不二周助_jpeg", + ctx=request_context, + recursive=True, + wait=True, + timeout=3, + ) + + assert tracker.registered_requests == ["tm-fs-rm"] + assert tracker.registered_roots + assert tracker.registered_roots[0]["request_was_registered"] is True + assert tracker.wait_calls == [("tm-fs-rm", 3)] + assert tracker.cleaned == ["tm-fs-rm"] + assert result["semantic_status"] == "complete" + + +@pytest.mark.asyncio +async def test_resource_rm_does_not_cleanup_memory_if_resource_delete_fails(request_context): + delete_error = RuntimeError("delete failed") + viking_fs = _FakeVikingFS(rm_error=delete_error) + cleanup = { + "status": "success", + "memory_uris": ["viking://user/ryoma/memories/entities/动漫角色/越前龙马.md"], + } + link_service = _FakeResourceMemoryLinkService(cleanup) + service = FSService( + viking_fs=viking_fs, + resource_memory_link_service=link_service, + ) + + with pytest.raises(RuntimeError, match="delete failed"): + await service.rm( + "viking://resources/images/2026/06/10/yueqian_jpeg", + ctx=request_context, + recursive=True, + ) + + assert link_service.calls == [] + + @pytest.mark.asyncio async def test_resource_rm_refreshes_memory_overview_for_cleaned_memories( request_context, From 33435f29e2ceb8f845e602ceb393c252e30c77bb Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Fri, 12 Jun 2026 16:54:28 +0800 Subject: [PATCH 13/19] bug fix --- .../service/resource_memory_link_service.py | 36 ++++++-- .../session/memory/utils/resource_refs.py | 11 ++- .../test_resource_memory_link_service.py | 39 ++++++++ tests/session/memory/test_resource_refs.py | 89 +++++++++++++++++++ 4 files changed, 165 insertions(+), 10 deletions(-) create mode 100644 tests/session/memory/test_resource_refs.py diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index d419bf691b..30dcea3277 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -297,15 +297,26 @@ async def before_resource_delete( memory_file=first.memory_file, resource_uri=resource_uri, reason=reason, + recursive=recursive, ) cleaned.extend(cleanup_result.written_uris + cleanup_result.edited_uris) deleted.extend(cleanup_result.deleted_uris) if memory_uri in cleanup_result.deleted_uris: continue if not cleanup_result.has_changes(): - await self._remove_resource_refs(memory_uri, resource_uri, ctx) + await self._remove_resource_refs( + memory_uri, + resource_uri, + ctx, + recursive=recursive, + ) cleaned.append(memory_uri) - await self._assert_resource_unlinked(memory_uri, resource_uri, ctx) + await self._assert_resource_unlinked( + memory_uri, + resource_uri, + ctx, + recursive=recursive, + ) except NotFoundError: deleted.append(memory_uri) except Exception as exc: @@ -329,6 +340,7 @@ async def _cleanup_memory_reference( memory_file: MemoryFile, resource_uri: str, reason: str, + recursive: bool = False, ) -> MemoryUpdateResult: del reason viking_fs = self._get_viking_fs() @@ -344,7 +356,7 @@ async def _cleanup_memory_reference( changed = remove_resource_references_from_memory( current, resource_uri, - recursive=True, + recursive=recursive, ) result = MemoryUpdateResult() if not changed: @@ -390,6 +402,8 @@ async def _remove_resource_refs( memory_uri: str, resource_uri: str, ctx: RequestContext, + *, + recursive: bool, ) -> None: viking_fs = self._get_viking_fs() raw = await viking_fs.read_file(memory_uri, ctx=ctx) @@ -397,7 +411,11 @@ async def _remove_resource_refs( refs = [ ref for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) - if not self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=True) + if not self._resource_ref_matches( + ref.get("resource_uri"), + resource_uri, + recursive=recursive, + ) ] if refs: mf.extra_fields["resource_refs"] = refs @@ -518,16 +536,22 @@ async def _assert_resource_unlinked( memory_uri: str, resource_uri: str, ctx: RequestContext, + *, + recursive: bool = True, ) -> None: try: raw = await self._get_viking_fs().read_file(memory_uri, ctx=ctx) except (NotFoundError, FileNotFoundError) as exc: raise NotFoundError(memory_uri, "memory") from exc mf = MemoryFileUtils.read(raw, uri=memory_uri) - if resource_uri in (mf.content or ""): + if content_references_resource(mf.content, resource_uri, recursive=recursive): raise RuntimeError(f"memory content still contains deleted resource URI: {memory_uri}") for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): - if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=True): + if self._resource_ref_matches( + ref.get("resource_uri"), + resource_uri, + recursive=recursive, + ): raise RuntimeError(f"memory still contains resource ref: {memory_uri}") @staticmethod diff --git a/openviking/session/memory/utils/resource_refs.py b/openviking/session/memory/utils/resource_refs.py index 48b10d6c10..d80e0d8eec 100644 --- a/openviking/session/memory/utils/resource_refs.py +++ b/openviking/session/memory/utils/resource_refs.py @@ -13,20 +13,23 @@ RESOURCE_REF_SOURCE_CONTENT_WRITE = "content.write" RESOURCE_REF_SOURCE_SESSION_COMMIT = "session.commit" +_RESOURCE_URI_PATH_CHARS = r"[^\s<>\]\)\"',。;:!?、,;:!?)】》]+" +_RESOURCE_URI_BOUNDARY = r"(?=$|[\s<>\]\)\"',。;:!?、,;:!?.)】》])" _RESOURCE_URI_PATTERN = ( r"viking://(?:" - r"resources(?:/[^\s<>\]\)\"']*)?" + rf"resources(?:/{_RESOURCE_URI_PATH_CHARS})?" r"|user/[^/\s<>\]\)\"']+/(?:" - r"resources(?:/[^\s<>\]\)\"']*)?" - r"|peers/[^/\s<>\]\)\"']+/resources(?:/[^\s<>\]\)\"']*)?" + rf"resources(?:/{_RESOURCE_URI_PATH_CHARS})?" + rf"|peers/[^/\s<>\]\)\"']+/resources(?:/{_RESOURCE_URI_PATH_CHARS})?" r")" r")" + rf"{_RESOURCE_URI_BOUNDARY}" ) _MARKDOWN_RESOURCE_LINK_RE = re.compile(rf"\[([^\]\n]+)\]\(({_RESOURCE_URI_PATTERN})\)") _RESOURCE_URI_RE = re.compile(_RESOURCE_URI_PATTERN) _CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) _INLINE_CODE_RE = re.compile(r"`[^`\n]+`") -_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?" +_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?、)】》" _SENTENCE_BOUNDARIES = "。!?.!?\n" _MAX_LINKIFIED_SENTENCE_CHARS = 160 _RESOURCE_CLEANUP_ARTIFACT_LINE_RE = re.compile( diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index 36efd05abd..277b0ba716 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -368,6 +368,45 @@ async def test_before_resource_delete_cleans_visible_uri_without_resource_refs( assert "resource_refs" not in mf.extra_fields +@pytest.mark.asyncio +async def test_before_resource_delete_exact_keeps_child_resource_refs( + request_context, +): + memory_uri = "viking://user/alice/memories/entities/photos.md" + resource_uri = "viking://resources/images/album" + child_uri = f"{resource_uri}/child.jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + f"用户保存了[相册资源]({resource_uri})。\n" + f"用户保存了[相册里的子图]({child_uri})。" + ), + extra_fields={ + "resource_refs": [ + {"resource_uri": resource_uri, "source": "content.write"}, + {"resource_uri": child_uri, "source": "content.write"}, + ], + }, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + recursive=False, + ) + + assert result["status"] == "success" + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert f"[相册资源]({resource_uri})" not in mf.content + assert f"[相册里的子图]({child_uri})" in mf.content + refs = mf.extra_fields["resource_refs"] + assert refs == [{"resource_uri": child_uri, "source": "content.write"}] + + @pytest.mark.asyncio async def test_before_resource_delete_deletes_previous_failed_cleanup_artifact( request_context, diff --git a/tests/session/memory/test_resource_refs.py b/tests/session/memory/test_resource_refs.py new file mode 100644 index 0000000000..ae9933f112 --- /dev/null +++ b/tests/session/memory/test_resource_refs.py @@ -0,0 +1,89 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 + +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.utils.resource_refs import ( + contains_resource_uri, + extract_resource_uris, + sync_memory_resource_refs, +) + + +def test_extract_resource_uris_stops_at_common_sentence_delimiters(): + cases = [ + ( + "看了 viking://resources/images/foo.jpeg,觉得不错", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg。还看了别的", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg;然后记录", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg!真的好", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg?真的好", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg、还有别的", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg)然后记录", + "viking://resources/images/foo.jpeg", + ), + ( + "read viking://resources/images/foo.jpeg, then commented", + "viking://resources/images/foo.jpeg", + ), + ] + + for content, expected in cases: + assert extract_resource_uris(content) == [expected] + + +def test_extract_resource_uris_does_not_match_resource_prefix_words(): + cases = [ + "viking://resources2/images/foo", + "viking://resources-old/images/foo", + "viking://user/alice/resources2/images/foo", + "viking://user/alice/resources-old/images/foo", + "viking://user/alice/peers/bob/resources2/images/foo", + "viking://user/alice/peers/bob/resources-old/images/foo", + ] + + for content in cases: + assert not contains_resource_uri(content) + assert extract_resource_uris(content) == [] + + +def test_sync_memory_resource_refs_keeps_bare_uri_clean_before_chinese_punctuation(): + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + mf = MemoryFile( + content=( + f"昨天晚上我看了 {resource_uri},这张图是越前龙马的照片。" + "以后提到越前龙马照片,可以参考这个资源。" + ), + extra_fields={}, + ) + + changed = sync_memory_resource_refs(mf, source="session.commit") + + assert changed is True + assert f"]({resource_uri}),这张图是越前龙马的照片。" in mf.content + refs = mf.extra_fields["resource_refs"] + assert refs == [ + { + "resource_uri": resource_uri, + "source": "session.commit", + "created_at": refs[0]["created_at"], + "match_text": "昨天晚上我看了", + } + ] From 36d57b6a335e4cdcd273d9f6507218fd8d191632 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Mon, 15 Jun 2026 15:19:56 +0800 Subject: [PATCH 14/19] bug fix --- crates/ov_cli/src/help_ui.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index b2bc0557b5..d0ca11bb5a 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -289,8 +289,6 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ }, ], subcommands: &[], -======= ->>>>>>> main next_steps: &[ HelpItem { label: "ov ls ", From a943ede67cbc91994ca1ecad2cf63c75b805a105 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Mon, 15 Jun 2026 15:26:21 +0800 Subject: [PATCH 15/19] bug fix --- crates/ov_cli/src/help_ui.rs | 22 +--------- crates/ov_cli/src/main.rs | 4 +- crates/ragfs/src/plugins/s3fs/client.rs | 58 ++++++++++++------------- 3 files changed, 33 insertions(+), 51 deletions(-) diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index d0ca11bb5a..1f726d79ce 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -270,25 +270,6 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ description: "Remove a subtree and wait for generated overviews to refresh.", }, ], - arguments: &[HelpItem { - label: "", - description: "Resource URI to remove.", - }], - common_options: &[HelpItem { - label: "-r, --recursive", - description: "Required for directory/subtree removal.", - }], - advanced_options: &[ - HelpItem { - label: "--wait", - description: "Wait for semantic refresh after deletion.", - }, - HelpItem { - label: "--timeout ", - description: "Maximum time to wait with --wait.", - }, - ], - subcommands: &[], next_steps: &[ HelpItem { label: "ov ls ", @@ -2546,6 +2527,7 @@ mod tests { for args in [ ["ov", "add-resource", "--help"], ["ov", "add-skill", "--help"], + ["ov", "rm", "--help"], ["ov", "write", "--help"], ] { let rendered = strip_ansi( @@ -2856,4 +2838,4 @@ mod tests { assert!(rendered.contains("ov ls [OPTIONS] [uri]")); assert!(rendered.contains("List resources under a Viking URI.")); } -} \ No newline at end of file +} diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 24269b53c0..e03389dc5e 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -435,10 +435,10 @@ enum Commands { #[arg(short, long, help_heading = "Common options")] recursive: bool, /// Wait until semantic refresh is complete - #[arg(long)] + #[arg(long, help_heading = "Common options")] wait: bool, /// Wait timeout in seconds (only used with --wait) - #[arg(long)] + #[arg(long, value_name = "seconds", help_heading = "Common options")] timeout: Option, }, /// [Data] Move or rename resource diff --git a/crates/ragfs/src/plugins/s3fs/client.rs b/crates/ragfs/src/plugins/s3fs/client.rs index a39178f0d7..7f468ccd43 100644 --- a/crates/ragfs/src/plugins/s3fs/client.rs +++ b/crates/ragfs/src/plugins/s3fs/client.rs @@ -4,9 +4,9 @@ //! Supports AWS S3 and S3-compatible services (MinIO, LocalStack, TOS). use crate::core::{ConfigValue, Error, Result}; -use aws_sdk_s3::error::SdkError; use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region}; use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::error::SdkError; use aws_sdk_s3::operation::{RequestId, RequestIdExt}; use aws_sdk_s3::primitives::ByteStream; use aws_sdk_s3::Client; @@ -479,7 +479,13 @@ impl S3Client { .body(ByteStream::from(data)) .send() .await - .map_err(|e| format_sdk_s3_error("PutObject", &format!("bucket={} key={key}", self.bucket), &e))?; + .map_err(|e| { + format_sdk_s3_error( + "PutObject", + &format!("bucket={} key={key}", self.bucket), + &e, + ) + })?; Ok(()) } @@ -493,7 +499,11 @@ impl S3Client { .send() .await .map_err(|e| { - format_sdk_s3_error("DeleteObject", &format!("bucket={} key={key}", self.bucket), &e) + format_sdk_s3_error( + "DeleteObject", + &format!("bucket={} key={key}", self.bucket), + &e, + ) })?; Ok(()) @@ -621,16 +631,13 @@ impl S3Client { req = req.continuation_token(token); } - let resp = req - .send() - .await - .map_err(|e| { - format_sdk_s3_error( - "ListObjectsV2", - &format!("bucket={} prefix={prefix}", self.bucket), - &e, - ) - })?; + let resp = req.send().await.map_err(|e| { + format_sdk_s3_error( + "ListObjectsV2", + &format!("bucket={} prefix={prefix}", self.bucket), + &e, + ) + })?; // Process files (contents) for obj in resp.contents() { @@ -695,16 +702,13 @@ impl S3Client { req = req.continuation_token(token); } - let resp = req - .send() - .await - .map_err(|e| { - format_sdk_s3_error( - "ListObjectsV2", - &format!("bucket={} prefix={prefix}", self.bucket), - &e, - ) - })?; + let resp = req.send().await.map_err(|e| { + format_sdk_s3_error( + "ListObjectsV2", + &format!("bucket={} prefix={prefix}", self.bucket), + &e, + ) + })?; for obj in resp.contents() { let key = obj.key().unwrap_or(""); @@ -992,12 +996,8 @@ mod tests { #[test] fn test_format_generic_s3_error_includes_operation_bucket_key_and_raw_error() { - let err = format_generic_s3_error( - "PutObject", - "test-bucket", - "tenant/a.txt", - "service error", - ); + let err = + format_generic_s3_error("PutObject", "test-bucket", "tenant/a.txt", "service error"); match err { Error::Internal(message) => { From 7891a8cec465adae17ea6c2eb293c2e2805c08a1 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Mon, 15 Jun 2026 15:41:32 +0800 Subject: [PATCH 16/19] format ruff fix --- .../models/vlm/backends/volcengine_vlm.py | 2 +- openviking/service/fs_service.py | 4 +- .../service/resource_memory_link_service.py | 4 +- openviking/session/memory/memory_updater.py | 6 +- openviking/session/memory/utils/language.py | 75 +++++++++++++++---- tests/service/test_fs_service.py | 8 +- .../test_resource_memory_link_service.py | 6 +- .../memory/test_memory_react_system_prompt.py | 5 +- tests/session/memory/test_memory_updater.py | 4 +- 9 files changed, 80 insertions(+), 34 deletions(-) diff --git a/openviking/models/vlm/backends/volcengine_vlm.py b/openviking/models/vlm/backends/volcengine_vlm.py index 34eab984e3..f3f96b6fd4 100644 --- a/openviking/models/vlm/backends/volcengine_vlm.py +++ b/openviking/models/vlm/backends/volcengine_vlm.py @@ -385,4 +385,4 @@ async def get_vision_completion_async( result = self._build_vlm_response(response, has_tools=bool(tools)) if tools: return result - return self._clean_response(str(result)) \ No newline at end of file + return self._clean_response(str(result)) diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index 92db4d5a9f..e02ddfb722 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -357,7 +357,9 @@ async def _wait_for_refresh(self, *, timeout: Optional[float]) -> Dict[str, Any] raise DeadlineExceededError("queue processing", timeout) from exc return get_request_wait_tracker().build_queue_status(telemetry_id) try: - return build_queue_status_payload(await get_queue_manager().wait_complete(timeout=timeout)) + return build_queue_status_payload( + await get_queue_manager().wait_complete(timeout=timeout) + ) except TimeoutError as exc: raise DeadlineExceededError("queue processing", timeout) from exc diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index 30dcea3277..8f7fb9cba9 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -458,7 +458,9 @@ async def _find_referencing_memories( for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive): matches.append(_MemoryRefMatch(uri, mf, ref)) - if not any(match.memory_uri == uri for match in matches) and content_references_resource( + if not any( + match.memory_uri == uri for match in matches + ) and content_references_resource( mf.content, resource_uri, recursive=recursive, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 881a0000a9..b3de674d09 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -834,7 +834,11 @@ async def _sync_resource_refs_for_result( viking_fs = self._get_viking_fs() deleted_uris = set(result.deleted_uris) for uri in dict.fromkeys(result.written_uris + result.edited_uris): - if uri in deleted_uris or uri.endswith("/.overview.md") or uri.endswith("/.abstract.md"): + if ( + uri in deleted_uris + or uri.endswith("/.overview.md") + or uri.endswith("/.abstract.md") + ): continue try: raw = await viking_fs.read_file(uri, ctx=ctx) diff --git a/openviking/session/memory/utils/language.py b/openviking/session/memory/utils/language.py index 16ad145134..3830b695de 100644 --- a/openviking/session/memory/utils/language.py +++ b/openviking/session/memory/utils/language.py @@ -81,20 +81,40 @@ # Use Timezone as a weak fallback signal. _TIMEZONE_LANGUAGE_GROUPS = { "zh-CN": ( - "asia/shanghai", "asia/chongqing", "asia/harbin", "asia/urumqi", - "asia/hong_kong", "asia/macau", "asia/taipei", "prc", "roc", "hongkong", - "china standard time", "taipei standard time", + "asia/shanghai", + "asia/chongqing", + "asia/harbin", + "asia/urumqi", + "asia/hong_kong", + "asia/macau", + "asia/taipei", + "prc", + "roc", + "hongkong", + "china standard time", + "taipei standard time", ), "ja": ("asia/tokyo", "japan", "tokyo standard time"), "ko": ("asia/seoul", "rok", "korea standard time"), "ru": ( - "europe/moscow", "europe/kaliningrad", "asia/yekaterinburg", "asia/vladivostok", + "europe/moscow", + "europe/kaliningrad", + "asia/yekaterinburg", + "asia/vladivostok", "russian standard time", ), "ar": ( - "asia/riyadh", "asia/dubai", "asia/qatar", "asia/kuwait", - "asia/baghdad", "africa/cairo", "africa/algiers", "africa/tunis", - "arab standard time", "arabian standard time", "egypt standard time", + "asia/riyadh", + "asia/dubai", + "asia/qatar", + "asia/kuwait", + "asia/baghdad", + "africa/cairo", + "africa/algiers", + "africa/tunis", + "arab standard time", + "arabian standard time", + "egypt standard time", ), "it": ("europe/rome",), "fr": ("europe/paris",), @@ -102,13 +122,34 @@ "de": ("europe/berlin",), "pt": ("europe/lisbon", "america/sao_paulo"), "en": ( - "america/new_york", "america/chicago", "america/denver", "america/los_angeles", - "america/phoenix", "america/anchorage", "pacific/honolulu", "us/eastern", - "us/central", "us/mountain", "us/pacific", "europe/london", "europe/dublin", - "gb", "gb-eire", "america/toronto", "america/vancouver", "canada/eastern", - "canada/pacific", "australia/sydney", "australia/melbourne", - "australia/brisbane", "australia/perth", "pacific/auckland", "nz", - "eastern standard time", "pacific standard time", "gmt standard time", + "america/new_york", + "america/chicago", + "america/denver", + "america/los_angeles", + "america/phoenix", + "america/anchorage", + "pacific/honolulu", + "us/eastern", + "us/central", + "us/mountain", + "us/pacific", + "europe/london", + "europe/dublin", + "gb", + "gb-eire", + "america/toronto", + "america/vancouver", + "canada/eastern", + "canada/pacific", + "australia/sydney", + "australia/melbourne", + "australia/brisbane", + "australia/perth", + "pacific/auckland", + "nz", + "eastern standard time", + "pacific standard time", + "gmt standard time", ), } @@ -128,7 +169,11 @@ def _language_allowed_by_fallback(language: str, fallback_language: str) -> bool def _is_strong_dominant(count: int, total: int) -> bool: - return count >= _STRONG_DOMINANT_MIN_CHARS and total > 0 and count / total >= _STRONG_DOMINANT_RATIO + return ( + count >= _STRONG_DOMINANT_MIN_CHARS + and total > 0 + and count / total >= _STRONG_DOMINANT_RATIO + ) def _language_from_locale_value(value: str) -> str: diff --git a/tests/service/test_fs_service.py b/tests/service/test_fs_service.py index 5e9db932cb..acdb136b9d 100644 --- a/tests/service/test_fs_service.py +++ b/tests/service/test_fs_service.py @@ -233,9 +233,7 @@ async def test_resource_rm_refreshes_memory_overview_for_cleaned_memories( ): cleanup = { "status": "success", - "memory_uris": [ - "viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test.md" - ], + "memory_uris": ["viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test.md"], "deleted_memory_uris": [ "viking://user/ryoma/memories/entities/动漫角色/不二周助-link-test2.md" ], @@ -261,9 +259,7 @@ async def fake_refresh_schema_overview(*, viking_fs, directory_uri, ctx): uri = "viking://resources/images/2026/06/11/不二周助_jpeg" result = await service.rm(uri, ctx=request_context, recursive=True) - assert link_service.calls == [ - {"ctx": request_context, "resource_uri": uri, "recursive": True} - ] + assert link_service.calls == [{"ctx": request_context, "resource_uri": uri, "recursive": True}] assert refreshed == [ { "viking_fs": viking_fs, diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index 277b0ba716..ad380358d4 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -342,8 +342,7 @@ async def test_before_resource_delete_cleans_visible_uri_without_resource_refs( MemoryFile( uri=memory_uri, content=( - "今天是清明节。\n" - f"用户昨晚查看了[越前龙马照片]({resource_uri}),之后可参考该资源。" + f"今天是清明节。\n用户昨晚查看了[越前龙马照片]({resource_uri}),之后可参考该资源。" ), extra_fields={"memory_type": "events"}, ) @@ -379,8 +378,7 @@ async def test_before_resource_delete_exact_keeps_child_resource_refs( MemoryFile( uri=memory_uri, content=( - f"用户保存了[相册资源]({resource_uri})。\n" - f"用户保存了[相册里的子图]({child_uri})。" + f"用户保存了[相册资源]({resource_uri})。\n用户保存了[相册里的子图]({child_uri})。" ), extra_fields={ "resource_refs": [ diff --git a/tests/session/memory/test_memory_react_system_prompt.py b/tests/session/memory/test_memory_react_system_prompt.py index 42b71e5142..a0fcdb24cd 100644 --- a/tests/session/memory/test_memory_react_system_prompt.py +++ b/tests/session/memory/test_memory_react_system_prompt.py @@ -203,10 +203,7 @@ def test_detect_language_ignores_resource_uri_latin_segments(self): id="m1", role="user", parts=[ - TextPart( - "这是越前龙马的照片 " - "viking://resources/images/2026/06/12/yueqian_jpeg" - ) + TextPart("这是越前龙马的照片 viking://resources/images/2026/06/12/yueqian_jpeg") ], ) ] diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index 6af0278bf8..1a548c2c79 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -131,7 +131,9 @@ def test_extract_context_resource_event_content_hides_add_resource_fields(self): f"2026-06-12,用户保存了粉丝创作的越前龙马动漫插画资源,资源URI为{resource_uri}。", ) - assert content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + assert ( + content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + ) assert "Resource URI" not in content assert "Added at" not in content assert "Resource abstract" not in content From d31147a0b3f618f2eeae0121f6eee7647a7e6f34 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Tue, 16 Jun 2026 21:18:07 +0800 Subject: [PATCH 17/19] =?UTF-8?q?--reason=20=E4=BD=BF=E7=94=A8=E5=90=8C?= =?UTF-8?q?=E4=B8=80=E4=B8=AAsession=5Fid=EF=BC=8Cruff=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/en/api/02-resources.md | 2 +- docs/zh/api/02-resources.md | 2 +- .../service/resource_memory_link_service.py | 115 +++++------------- .../test_resource_memory_link_service.py | 80 ++++++++++-- .../service/test_core_encryption_startup.py | 35 ------ .../session/memory/test_embedding_template.py | 5 +- 6 files changed, 107 insertions(+), 132 deletions(-) diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 7fd4c3dae9..04468acba8 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -126,7 +126,7 @@ This endpoint is the core entry point for resource management, supporting adding 3. Call the corresponding Parser to parse content 4. Build the directory tree and write to AGFS 5. Wait for semantic processing completion when `wait=true`; with `wait=false`, return a `task_id` for queue tracking -6. If `reason` is non-empty, commit a short temporary session through the normal memory extraction pipeline so suitable user memories can reference the resource URI +6. If `reason` is non-empty, append it to the fixed resource reason session and commit through the normal memory extraction pipeline so suitable user memories can reference the resource URI 7. Set up scheduled update task if `watch_interval` is specified **Code Entry Points**: diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 43f9e66826..75e31110c8 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -121,7 +121,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector 3. 调用对应 Parser 解析内容 4. 构建目录树并写入 AGFS 5. `wait=true` 时等待语义处理完成;`wait=false` 时返回 `task_id` 用于队列跟踪 -6. 如果 `reason` 非空,通过一次短临时 session 复用常规记忆抽取链路,让合适的用户记忆引用该资源 URI +6. 如果 `reason` 非空,将其追加到固定的资源 reason session 并 commit,复用常规记忆抽取链路,让合适的用户记忆引用该资源 URI 7. 如指定 `--watch-interval`,设置定时更新任务 **代码入口**: diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index 8f7fb9cba9..07fceee724 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -12,7 +12,6 @@ from dataclasses import dataclass from datetime import datetime, timezone from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence -from uuid import uuid4 from openviking.core.namespace import canonical_user_root, context_type_for_uri from openviking.message.part import TextPart @@ -39,6 +38,7 @@ logger = get_logger(__name__) +_RESOURCE_REASON_SESSION_ID = "__openviking_resource_reason__" _RESOURCE_REASON_MEMORY_TYPES = ["entities", "events", "preferences"] _RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS = 1800.0 _RESOURCE_ABSTRACT_MAX_CHARS = 200 @@ -48,6 +48,14 @@ ) +def _resource_reason_memory_policy() -> Dict[str, Any]: + return { + "self": {"enabled": True}, + "peer": {"enabled": False}, + "memory_types": list(_RESOURCE_REASON_MEMORY_TYPES), + } + + @dataclass class _MemoryRefMatch: memory_uri: str @@ -68,7 +76,7 @@ def __init__( self._vikingdb = vikingdb self._viking_fs = viking_fs self._session_service = session_service - self._background_tasks: set[asyncio.Task] = set() + self._reason_session_lock = asyncio.Lock() def set_dependencies( self, @@ -105,20 +113,17 @@ async def on_resource_added( added_at = datetime.now(timezone.utc).isoformat() resource_abstract = await self._read_resource_directory_abstract(resource_uri, ctx) - session_id = f"resource_reason_{uuid4().hex}" + session_id = _RESOURCE_REASON_SESSION_ID commit_result: Dict[str, Any] = {} task_result: Optional[Dict[str, Any]] = None - delete_session_now = True - try: - session = await self._session_service.create( + + async with self._reason_session_lock: + session = await self._session_service.get( + session_id, ctx, - session_id=session_id, - memory_policy={ - "self": {"enabled": True}, - "peer": {"enabled": False}, - "memory_types": _RESOURCE_REASON_MEMORY_TYPES, - }, + auto_create=True, ) + session.meta.memory_policy = _resource_reason_memory_policy() session.add_messages( [ { @@ -143,32 +148,21 @@ async def on_resource_added( ctx, keep_recent_count=0, ) - task_id = commit_result.get("task_id") - if task_id: - try: - task_result = await self._wait_for_commit_task( - task_id=str(task_id), - ctx=ctx, - timeout=timeout, - ) - except asyncio.TimeoutError: - delete_session_now = False - self._schedule_session_delete_after_task( - session_id=session_id, - task_id=str(task_id), - ctx=ctx, - ) - raise - return { - "status": "success", - "session_id": session_id, - "commit_task_id": task_id, - "archive_uri": commit_result.get("archive_uri"), - "commit_task": task_result, - } - finally: - if delete_session_now: - await self._delete_temporary_session(session_id, ctx) + + task_id = commit_result.get("task_id") + if task_id: + task_result = await self._wait_for_commit_task( + task_id=str(task_id), + ctx=ctx, + timeout=timeout, + ) + return { + "status": "success", + "session_id": session_id, + "commit_task_id": task_id, + "archive_uri": commit_result.get("archive_uri"), + "commit_task": task_result, + } @staticmethod def _build_resource_addition_message( @@ -188,51 +182,6 @@ def _build_resource_addition_message( f"User reason: {reason}" ) - def _schedule_session_delete_after_task( - self, - *, - session_id: str, - task_id: str, - ctx: RequestContext, - ) -> None: - task = asyncio.create_task( - self._delete_session_after_task(session_id=session_id, task_id=task_id, ctx=ctx) - ) - self._background_tasks.add(task) - task.add_done_callback(self._background_tasks.discard) - - async def _delete_session_after_task( - self, - *, - session_id: str, - task_id: str, - ctx: RequestContext, - ) -> None: - try: - await self._wait_for_commit_task( - task_id=task_id, - ctx=ctx, - timeout=_RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS, - ) - except Exception as exc: - logger.warning( - "Skipped temporary resource reason session cleanup after task %s: %s", - task_id, - exc, - ) - return - await self._delete_temporary_session(session_id, ctx) - - async def _delete_temporary_session(self, session_id: str, ctx: RequestContext) -> None: - if not self._session_service: - return - try: - await self._session_service.delete(session_id, ctx) - except NotFoundError: - pass - except Exception as exc: - logger.warning("Failed to delete temporary resource reason session: %s", exc) - async def _wait_for_commit_task( self, *, diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index ad380358d4..a4add5ea17 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -2,12 +2,16 @@ # SPDX-License-Identifier: AGPL-3.0 """Tests for resource-memory linking service.""" +from types import SimpleNamespace from unittest.mock import AsyncMock import pytest from openviking.server.identity import RequestContext, Role -from openviking.service.resource_memory_link_service import ResourceMemoryLinkService +from openviking.service.resource_memory_link_service import ( + _RESOURCE_REASON_SESSION_ID, + ResourceMemoryLinkService, +) from openviking.session.memory.dataclass import MemoryFile from openviking.session.memory.memory_updater import MemoryUpdateResult from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils @@ -54,6 +58,7 @@ async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): class _FakeSession: def __init__(self): self.messages = [] + self.meta = SimpleNamespace(memory_policy=None) def add_messages(self, specs): self.messages.extend(specs) @@ -63,6 +68,7 @@ class _FakeSessionService: def __init__(self): self.session = _FakeSession() self.created = [] + self.got = [] self.committed = [] self.deleted = [] @@ -76,7 +82,18 @@ async def create(self, ctx, session_id=None, memory_policy=None): ) return self.session + async def get(self, session_id, ctx, auto_create=False): + self.got.append( + { + "ctx": ctx, + "session_id": session_id, + "auto_create": auto_create, + } + ) + return self.session + async def commit_async(self, session_id, ctx, keep_recent_count=0): + archive_index = len(self.committed) + 1 self.committed.append( { "ctx": ctx, @@ -86,7 +103,9 @@ async def commit_async(self, session_id, ctx, keep_recent_count=0): ) return { "task_id": None, - "archive_uri": f"viking://user/alice/sessions/{session_id}/history/archive_001", + "archive_uri": ( + f"viking://user/alice/sessions/{session_id}/history/archive_{archive_index:03d}" + ), } async def delete(self, session_id, ctx): @@ -102,7 +121,7 @@ def request_context(): @pytest.mark.asyncio -async def test_on_resource_added_bridges_reason_through_temporary_session(request_context): +async def test_on_resource_added_bridges_reason_through_fixed_session(request_context): resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" session_service = _FakeSessionService() service = ResourceMemoryLinkService( @@ -121,18 +140,20 @@ async def test_on_resource_added_bridges_reason_through_temporary_session(reques session_id = result["session_id"] assert result["status"] == "success" - assert session_id.startswith("resource_reason_") - assert session_service.created == [ + assert session_id == _RESOURCE_REASON_SESSION_ID + assert session_service.got == [ { "ctx": request_context, "session_id": session_id, - "memory_policy": { - "self": {"enabled": True}, - "peer": {"enabled": False}, - "memory_types": ["entities", "events", "preferences"], - }, + "auto_create": True, } ] + assert session_service.created == [] + assert session_service.session.meta.memory_policy == { + "self": {"enabled": True}, + "peer": {"enabled": False}, + "memory_types": ["entities", "events", "preferences"], + } assert session_service.committed == [ { "ctx": request_context, @@ -140,7 +161,7 @@ async def test_on_resource_added_bridges_reason_through_temporary_session(reques "keep_recent_count": 0, } ] - assert session_service.deleted == [{"ctx": request_context, "session_id": session_id}] + assert session_service.deleted == [] message_text = session_service.session.messages[0]["parts"][0].text assert resource_uri in message_text assert "这是越前龙马的照片" in message_text @@ -148,6 +169,43 @@ async def test_on_resource_added_bridges_reason_through_temporary_session(reques assert "动漫角色照片合集" in message_text +@pytest.mark.asyncio +async def test_on_resource_added_reuses_same_reason_session(request_context): + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + first = await service.on_resource_added( + ctx=request_context, + resource_uri="viking://resources/images/ryoma.jpeg", + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + second = await service.on_resource_added( + ctx=request_context, + resource_uri="viking://resources/images/fuji.jpeg", + reason="这是不二周助的照片", + source_name="fuji.jpeg", + ) + + assert first["session_id"] == _RESOURCE_REASON_SESSION_ID + assert second["session_id"] == _RESOURCE_REASON_SESSION_ID + assert [call["session_id"] for call in session_service.got] == [ + _RESOURCE_REASON_SESSION_ID, + _RESOURCE_REASON_SESSION_ID, + ] + assert [call["session_id"] for call in session_service.committed] == [ + _RESOURCE_REASON_SESSION_ID, + _RESOURCE_REASON_SESSION_ID, + ] + assert session_service.deleted == [] + messages = [item["parts"][0].text for item in session_service.session.messages] + assert "这是越前龙马的照片" in messages[0] + assert "这是不二周助的照片" in messages[1] + + @pytest.mark.asyncio async def test_read_resource_directory_abstract_uses_parent_abstract(request_context): service = ResourceMemoryLinkService( diff --git a/tests/unit/service/test_core_encryption_startup.py b/tests/unit/service/test_core_encryption_startup.py index e05a753c2f..dfe1d7c4db 100644 --- a/tests/unit/service/test_core_encryption_startup.py +++ b/tests/unit/service/test_core_encryption_startup.py @@ -8,7 +8,6 @@ import pytest -from openviking.pyagfs.exceptions import AGFSNotFoundError from openviking.service.core import OpenVikingService from openviking.utils.agfs_utils import RagfsBindingConfig @@ -67,40 +66,6 @@ async def _bootstrap(config: dict) -> _FakeEncryptor: assert isinstance(service._encryptor, _FakeEncryptor) -@pytest.mark.parametrize( - ("encrypted_mode", "raw", "message"), - [ - (True, b"{}", "plaintext"), - (False, b"OVE1ciphertext", "encrypted"), - ], -) -def test_probe_storage_shape_rejects_mode_mismatch(encrypted_mode, raw, message): - """Reject existing system metadata whose shape differs from current encryption mode.""" - - class _Client: - def read_raw(self, path: str) -> bytes: - assert path == "/local/_system/accounts.json" - return raw - - service = OpenVikingService.__new__(OpenVikingService) - - with pytest.raises(RuntimeError, match=message): - service._probe_storage_shape(_Client(), encrypted_mode) - - -def test_probe_storage_shape_allows_empty_system(): - """Treat missing system metadata as a fresh system.""" - - class _Client: - def read_raw(self, path: str) -> bytes: - assert path == "/local/_system/accounts.json" - raise AGFSNotFoundError("not found") - - service = OpenVikingService.__new__(OpenVikingService) - - service._probe_storage_shape(_Client(), encrypted_mode=True) - - def test_ensure_data_dir_lock_acquired_once(monkeypatch, tmp_path): """Acquire the data-dir lock once before startup encryption bootstrap.""" diff --git a/tests/unit/session/memory/test_embedding_template.py b/tests/unit/session/memory/test_embedding_template.py index 8f39655925..cbf8de5891 100644 --- a/tests/unit/session/memory/test_embedding_template.py +++ b/tests/unit/session/memory/test_embedding_template.py @@ -33,7 +33,10 @@ def setup(self): def test_events_exposes_embedding_template(self): schema = self.registry.get("events") - assert schema.embedding_template == "{{ event_name }}\n\n{{ goal }}\n\n{{ content }}" + assert ( + schema.embedding_template + == "EventName: {{ event_name }}\nGoal: {{ goal }}\n{{ content }}" + ) def test_preferences_exposes_embedding_template(self): schema = self.registry.get("preferences") From fde9e23c730a7370e2122447d20656f83aecda38 Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Tue, 16 Jun 2026 22:13:15 +0800 Subject: [PATCH 18/19] =?UTF-8?q?--reason=20=E4=BD=BF=E7=94=A8=E5=90=8C?= =?UTF-8?q?=E4=B8=80=E4=B8=AAsession=5Fid=EF=BC=8Cruff=E4=BF=AE=E6=AD=A3?= =?UTF-8?q?=EF=BC=8Cpeer=20memory?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/en/api/02-resources.md | 4 +- docs/zh/api/02-resources.md | 4 +- .../service/resource_memory_link_service.py | 106 ++++++++++++------ .../test_resource_memory_link_service.py | 95 ++++++++++++++++ 4 files changed, 170 insertions(+), 39 deletions(-) diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 04468acba8..e890bc389f 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -169,8 +169,8 @@ This endpoint is the core entry point for resource management, supporting adding - Raw HTTP calls for local files require first uploading via [temp_upload](#temp_upload) to obtain `temp_file_id` - When `to` is specified and the target already exists, triggers incremental update - Only Git repository sources use full background import when `wait=false`; OpenViking performs repository preflight and target planning before returning the `task_id`. -- Memory generated from `reason` is extracted through the same pipeline as `session.commit`. It uses `reason`, the `viking://resources/...` URI, available source name, and available directory abstract; it does not inspect or expand the full resource content. OpenViking writes to existing memory types such as `entities`, `events`, or `preferences`, not a dedicated resource memory directory. -- When deleting `viking://resources/...`, OpenViking scans the current user's memories before deletion, removes the matching resource URI and content introduced by that `reason`, and refreshes the semantic index for the affected memories. +- Memory generated from `reason` is extracted through the same pipeline as `session.commit`. It uses `reason`, the resource URI, available source name, and available directory abstract; it does not inspect or expand the full resource content. OpenViking writes to existing memory types such as `entities`, `events`, or `preferences`, not a dedicated resource memory directory. +- When deleting a resource, OpenViking scans the self or peer memories targeted by the current context before deletion, removes the matching resource URI and content introduced by that `reason`, and refreshes the semantic index for the affected memories. - Other sources with `wait=false` finish source parsing, target resolution, and AGFS writes before returning. Only semantic and embedding queues continue asynchronously. - When `watch_interval > 0`, the watch task binds to `to` if provided; otherwise it binds to the `root_uri` returned by this import. If no stable `root_uri` is available, the request fails and asks for an explicit `to`. - Feishu/Lark app-token imports do not pass `args.feishu_access_token`. OpenViking keeps the existing app credential flow and the SDK obtains an app/tenant token from `app_id` and `app_secret`. This mode supports both one-time imports and `watch_interval > 0`. diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 75e31110c8..799ba46e6e 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -162,8 +162,8 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector - `user_id` 和 `peer_id` 路径片段必须是安全的单段标识,例如 `alice` 或 `web-visitor-alice`。包含路径分隔符、`.`、`..`、`:` 或 `+` 的值会被拒绝。 - `path` 和 `temp_file_id` 不能同时指定,上传本地文件需要先通过 [temp_upload](#temp_upload) 上传获取 `temp_file_id`,在 SDK 和 CLI 中已经封装好。 - 只有 Git 仓库来源在 `wait=false` 时使用完整后台导入;OpenViking 会先完成仓库 preflight 和目标规划,再返回 `task_id`。 -- `reason` 触发的记忆生成复用 `session.commit` 的抽取链路,只使用 `reason`、`viking://resources/...` URI、可用的资源名称和目录摘要,不会读取或展开完整资源正文;系统会写入 `entities`、`events`、`preferences` 等已有记忆类型,不创建独立的资源记忆目录。 -- 删除 `viking://resources/...` 时,系统会在删除前扫描当前用户记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 +- `reason` 触发的记忆生成复用 `session.commit` 的抽取链路,只使用 `reason`、资源 URI、可用的资源名称和目录摘要,不会读取或展开完整资源正文;系统会写入 `entities`、`events`、`preferences` 等已有记忆类型,不创建独立的资源记忆目录。 +- 删除资源时,系统会在删除前扫描本次上下文对应的 self 或 peer 记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 - 其他来源在 `wait=false` 时会在响应前完成来源解析、目标解析和 AGFS 写入,仅 semantic 与 embedding 队列继续异步处理。 - `watch_interval > 0` 时,如果指定了 `to`,监控任务绑定该目标;如果未指定 `to`,监控任务绑定本次导入返回的 `root_uri`。如果无法得到稳定 `root_uri`,请求会报错并要求显式传 `to`。 - 飞书/Lark 应用 token 导入不传 `args.feishu_access_token`。OpenViking 保持原有应用凭证流程,由 SDK 使用 `app_id` 和 `app_secret` 自动获取 app/tenant token。该模式支持一次性导入和 `watch_interval > 0`。 diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py index 07fceee724..07a311ff51 100644 --- a/openviking/service/resource_memory_link_service.py +++ b/openviking/service/resource_memory_link_service.py @@ -13,7 +13,14 @@ from datetime import datetime, timezone from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence -from openviking.core.namespace import canonical_user_root, context_type_for_uri +from openviking.core.namespace import ( + NamespaceShapeError, + canonical_user_root, + canonicalize_uri, + context_type_for_uri, + uri_parts, +) +from openviking.core.peer_id import normalize_peer_id from openviking.message.part import TextPart from openviking.server.identity import RequestContext from openviking.session.memory.dataclass import MemoryFile @@ -48,14 +55,40 @@ ) -def _resource_reason_memory_policy() -> Dict[str, Any]: +def _resource_reason_memory_policy(target_peer_id: Optional[str] = None) -> Dict[str, Any]: + peer_targeted = bool(target_peer_id) return { - "self": {"enabled": True}, - "peer": {"enabled": False}, + "self": {"enabled": not peer_targeted}, + "peer": {"enabled": peer_targeted}, "memory_types": list(_RESOURCE_REASON_MEMORY_TYPES), } +def _resource_reason_peer_id(ctx: RequestContext, resource_uri: str) -> Optional[str]: + actor_peer_id = normalize_peer_id(ctx.actor_peer_id) + if actor_peer_id: + return actor_peer_id + return _peer_id_from_resource_uri(resource_uri, ctx) + + +def _peer_id_from_resource_uri(resource_uri: str, ctx: RequestContext) -> Optional[str]: + try: + parts = uri_parts(canonicalize_uri(resource_uri, ctx)) + except (NamespaceShapeError, ValueError): + return None + if len(parts) >= 5 and parts[0] == "user" and parts[2] == "peers": + return normalize_peer_id(parts[3]) + return None + + +def _memory_roots_for_resource_refs(ctx: RequestContext, resource_uri: str) -> List[str]: + user_root = canonical_user_root(ctx) + target_peer_id = _resource_reason_peer_id(ctx, resource_uri) + if target_peer_id: + return [f"{user_root}/peers/{target_peer_id}/memories"] + return [f"{user_root}/memories"] + + @dataclass class _MemoryRefMatch: memory_uri: str @@ -114,6 +147,7 @@ async def on_resource_added( added_at = datetime.now(timezone.utc).isoformat() resource_abstract = await self._read_resource_directory_abstract(resource_uri, ctx) session_id = _RESOURCE_REASON_SESSION_ID + target_peer_id = _resource_reason_peer_id(ctx, resource_uri) commit_result: Dict[str, Any] = {} task_result: Optional[Dict[str, Any]] = None @@ -123,26 +157,25 @@ async def on_resource_added( ctx, auto_create=True, ) - session.meta.memory_policy = _resource_reason_memory_policy() - session.add_messages( - [ - { - "role": "user", - "parts": [ - TextPart( - text=self._build_resource_addition_message( - resource_uri=resource_uri, - reason=reason, - source_name=source_name, - added_at=added_at, - resource_abstract=resource_abstract, - ) - ) - ], - "created_at": added_at, - } - ] - ) + session.meta.memory_policy = _resource_reason_memory_policy(target_peer_id) + message_spec: Dict[str, Any] = { + "role": "user", + "parts": [ + TextPart( + text=self._build_resource_addition_message( + resource_uri=resource_uri, + reason=reason, + source_name=source_name, + added_at=added_at, + resource_abstract=resource_abstract, + ) + ) + ], + "created_at": added_at, + } + if target_peer_id: + message_spec["peer_id"] = target_peer_id + session.add_messages([message_spec]) commit_result = await self._session_service.commit_async( session_id, ctx, @@ -380,18 +413,21 @@ async def _find_referencing_memories( recursive: bool, ) -> List[_MemoryRefMatch]: viking_fs = self._get_viking_fs() - memory_root = f"{canonical_user_root(ctx)}/memories" - try: - entries = await viking_fs.tree( - memory_root, - ctx=ctx, - node_limit=1000000, - level_limit=None, - ) - except Exception: - return [] - matches: List[_MemoryRefMatch] = [] + entries: List[Dict[str, Any]] = [] + for memory_root in _memory_roots_for_resource_refs(ctx, resource_uri): + try: + entries.extend( + await viking_fs.tree( + memory_root, + ctx=ctx, + node_limit=1000000, + level_limit=None, + ) + ) + except Exception: + continue + for entry in entries: uri = entry.get("uri", "") rel_path = entry.get("rel_path", "") diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py index a4add5ea17..45b12e32da 100644 --- a/tests/service/test_resource_memory_link_service.py +++ b/tests/service/test_resource_memory_link_service.py @@ -206,6 +206,66 @@ async def test_on_resource_added_reuses_same_reason_session(request_context): assert "这是不二周助的照片" in messages[1] +@pytest.mark.asyncio +async def test_on_resource_added_routes_reason_to_actor_peer(request_context): + peer_ctx = RequestContext( + user=request_context.user, + role=request_context.role, + actor_peer_id="web-visitor-alice", + ) + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + result = await service.on_resource_added( + ctx=peer_ctx, + resource_uri="viking://resources/images/ryoma.jpeg", + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + + assert result["session_id"] == _RESOURCE_REASON_SESSION_ID + assert session_service.session.meta.memory_policy == { + "self": {"enabled": False}, + "peer": {"enabled": True}, + "memory_types": ["entities", "events", "preferences"], + } + assert session_service.session.messages[0]["peer_id"] == "web-visitor-alice" + assert session_service.committed == [ + { + "ctx": peer_ctx, + "session_id": _RESOURCE_REASON_SESSION_ID, + "keep_recent_count": 0, + } + ] + + +@pytest.mark.asyncio +async def test_on_resource_added_routes_peer_resource_uri_to_peer(request_context): + resource_uri = "viking://user/alice/peers/web-visitor-alice/resources/images/ryoma.jpeg" + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + await service.on_resource_added( + ctx=request_context, + resource_uri=resource_uri, + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + + assert session_service.session.meta.memory_policy == { + "self": {"enabled": False}, + "peer": {"enabled": True}, + "memory_types": ["entities", "events", "preferences"], + } + assert session_service.session.messages[0]["peer_id"] == "web-visitor-alice" + + @pytest.mark.asyncio async def test_read_resource_directory_abstract_uses_parent_abstract(request_context): service = ResourceMemoryLinkService( @@ -281,6 +341,41 @@ async def test_find_referencing_memories_uses_memory_refs(request_context): assert matches[0].resource_ref["resource_uri"] == resource_uri +@pytest.mark.asyncio +async def test_find_referencing_memories_scans_actor_peer_memory(request_context): + peer_ctx = RequestContext( + user=request_context.user, + role=request_context.role, + actor_peer_id="web-visitor-alice", + ) + memory_uri = "viking://user/alice/peers/web-visitor-alice/memories/entities/wang.md" + resource_uri = "viking://resources/docs/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + + matches = await service._find_referencing_memories( + ctx=peer_ctx, + resource_uri=resource_uri, + recursive=True, + ) + + assert len(matches) == 1 + assert matches[0].memory_uri == memory_uri + assert matches[0].resource_ref["resource_uri"] == resource_uri + + @pytest.mark.asyncio async def test_before_resource_delete_removes_refs_when_cleanup_has_no_changes(request_context): memory_uri = "viking://user/alice/memories/entities/wang.md" From d740397e9b2b1641f293b269a54e91ac946a448c Mon Sep 17 00:00:00 2001 From: "fujiajie.168" Date: Tue, 16 Jun 2026 22:17:28 +0800 Subject: [PATCH 19/19] =?UTF-8?q?ruff=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- openviking/session/memory/session_extract_context_provider.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index 3bff6ab4a5..5dbdcb18d0 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -91,8 +91,6 @@ def read_file_contents(self) -> Dict[str, MemoryFile]: def get_conversation_text(self) -> str: """Get the full conversation text for match_text validation.""" - from openviking.message.part import TextPart - text_parts = [] for message in self.messages or []: for part in getattr(message, "parts", []): @@ -139,7 +137,6 @@ def _get_vision_vlm(self): def _detect_language(self) -> str: """检测输出语言""" - from openviking.message.part import TextPart from openviking.session.memory.utils import ( resolve_output_language, strip_language_detection_noise,