diff --git a/crates/ov_cli/src/base_client.rs b/crates/ov_cli/src/base_client.rs index 1cf5e58ef5..c21f69dedc 100644 --- a/crates/ov_cli/src/base_client.rs +++ b/crates/ov_cli/src/base_client.rs @@ -934,4 +934,4 @@ impl<'a> FileUploader<'a> { .map(|s| s.to_string()) .ok_or_else(|| Error::Parse("Missing temp_file_id in response".to_string())) } -} +} \ No newline at end of file diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index 637c586f25..3c981bfdb8 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -366,11 +366,21 @@ impl HttpClient { self.post("/api/v1/fs/mkdir", &body).await } - pub async fn rm(&self, uri: &str, recursive: bool) -> Result { - let params = vec![ + pub async fn rm( + &self, + uri: &str, + recursive: bool, + wait: bool, + timeout: Option, + ) -> Result { + let mut params = vec![ ("uri".to_string(), uri.to_string()), ("recursive".to_string(), recursive.to_string()), + ("wait".to_string(), wait.to_string()), ]; + if let Some(timeout) = timeout { + params.push(("timeout".to_string(), timeout.to_string())); + } self.delete("/api/v1/fs", ¶ms).await } diff --git a/crates/ov_cli/src/commands/filesystem.rs b/crates/ov_cli/src/commands/filesystem.rs index bf557ad3fa..e75de3cfc6 100644 --- a/crates/ov_cli/src/commands/filesystem.rs +++ b/crates/ov_cli/src/commands/filesystem.rs @@ -385,10 +385,12 @@ pub async fn rm( client: &HttpClient, uri: &str, recursive: bool, + wait: bool, + timeout: Option, output_format: OutputFormat, compact: bool, ) -> Result<()> { - let result = client.rm(uri, recursive).await?; + let result = client.rm(uri, recursive, wait, timeout).await?; let message = if let Some(count) = result .get("estimated_deleted_count") diff --git a/crates/ov_cli/src/handlers.rs b/crates/ov_cli/src/handlers.rs index 342cf81048..ccae601771 100644 --- a/crates/ov_cli/src/handlers.rs +++ b/crates/ov_cli/src/handlers.rs @@ -1427,9 +1427,24 @@ pub async fn handle_mkdir(uri: String, description: Option, ctx: CliCont .await } -pub async fn handle_rm(uri: String, recursive: bool, ctx: CliContext) -> Result<()> { +pub async fn handle_rm( + uri: String, + recursive: bool, + wait: bool, + timeout: Option, + ctx: CliContext, +) -> Result<()> { let client = ctx.get_client(); - commands::filesystem::rm(&client, &uri, recursive, ctx.output_format, ctx.compact).await + commands::filesystem::rm( + &client, + &uri, + recursive, + wait, + timeout, + ctx.output_format, + ctx.compact, + ) + .await } pub async fn handle_mv(from_uri: String, to_uri: String, ctx: CliContext) -> Result<()> { diff --git a/crates/ov_cli/src/help_ui.rs b/crates/ov_cli/src/help_ui.rs index 7600cfea37..96a5fc9553 100644 --- a/crates/ov_cli/src/help_ui.rs +++ b/crates/ov_cli/src/help_ui.rs @@ -265,6 +265,10 @@ const COMMAND_HELP_SPECS: &[CommandHelpSpec] = &[ label: "ov rm viking://scratch --recursive", description: "Remove a directory subtree.", }, + HelpItem { + label: "ov rm viking://resources/images/foo --recursive --wait", + description: "Remove a subtree and wait for generated overviews to refresh.", + }, ], next_steps: &[ HelpItem { @@ -2675,6 +2679,7 @@ mod tests { for args in [ ["ov", "add-resource", "--help"], ["ov", "add-skill", "--help"], + ["ov", "rm", "--help"], ["ov", "write", "--help"], ] { let rendered = strip_ansi( diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 2cf4550011..38fb5386f6 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -438,6 +438,12 @@ enum Commands { /// Remove recursively #[arg(short, long, help_heading = "Common options")] recursive: bool, + /// Wait until semantic refresh is complete + #[arg(long, help_heading = "Common options")] + wait: bool, + /// Wait timeout in seconds (only used with --wait) + #[arg(long, value_name = "seconds", help_heading = "Common options")] + timeout: Option, }, /// [Data] Move or rename resource #[command(alias = "rename")] @@ -2778,7 +2784,12 @@ async fn main() { level_limit, } => handlers::handle_tree(uri, abs_limit, all, node_limit, level_limit, ctx).await, Commands::Mkdir { uri, description } => handlers::handle_mkdir(uri, description, ctx).await, - Commands::Rm { uri, recursive } => handlers::handle_rm(uri, recursive, ctx).await, + Commands::Rm { + uri, + recursive, + wait, + timeout, + } => handlers::handle_rm(uri, recursive, wait, timeout, ctx).await, Commands::Mv { from_uri, to_uri } => handlers::handle_mv(from_uri, to_uri, ctx).await, Commands::Stat { uri } => handlers::handle_stat(uri, ctx).await, Commands::AddMemory { content } => handlers::handle_add_memory(content, ctx).await, diff --git a/crates/ov_cli/src/tui/app.rs b/crates/ov_cli/src/tui/app.rs index 43f99051a0..fd0e0f0fb1 100644 --- a/crates/ov_cli/src/tui/app.rs +++ b/crates/ov_cli/src/tui/app.rs @@ -624,7 +624,7 @@ impl App { } }; - match client.rm(&selected_uri, is_dir).await { + match client.rm(&selected_uri, is_dir, false, None).await { Ok(_) => { self.set_status_message(format!("Deleted: {}", selected_uri)); diff --git a/crates/ragfs/src/plugins/s3fs/client.rs b/crates/ragfs/src/plugins/s3fs/client.rs index 76d122e46a..d79df53952 100644 --- a/crates/ragfs/src/plugins/s3fs/client.rs +++ b/crates/ragfs/src/plugins/s3fs/client.rs @@ -4,9 +4,9 @@ //! Supports AWS S3 and S3-compatible services (MinIO, LocalStack, TOS). use crate::core::{ConfigValue, Error, Result}; -use aws_sdk_s3::error::SdkError; use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region}; use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::error::SdkError; use aws_sdk_s3::operation::{RequestId, RequestIdExt}; use aws_sdk_s3::primitives::ByteStream; use aws_sdk_s3::Client; @@ -526,7 +526,11 @@ impl S3Client { .send() .await .map_err(|e| { - format_sdk_s3_error("DeleteObject", &format!("bucket={} key={key}", self.bucket), &e) + format_sdk_s3_error( + "DeleteObject", + &format!("bucket={} key={key}", self.bucket), + &e, + ) })?; Ok(()) @@ -725,16 +729,13 @@ impl S3Client { req = req.continuation_token(token); } - let resp = req - .send() - .await - .map_err(|e| { - format_sdk_s3_error( - "ListObjectsV2", - &format!("bucket={} prefix={prefix}", self.bucket), - &e, - ) - })?; + let resp = req.send().await.map_err(|e| { + format_sdk_s3_error( + "ListObjectsV2", + &format!("bucket={} prefix={prefix}", self.bucket), + &e, + ) + })?; for obj in resp.contents() { let key = obj.key().unwrap_or(""); @@ -1048,12 +1049,8 @@ mod tests { #[test] fn test_format_generic_s3_error_includes_operation_bucket_key_and_raw_error() { - let err = format_generic_s3_error( - "PutObject", - "test-bucket", - "tenant/a.txt", - "service error", - ); + let err = + format_generic_s3_error("PutObject", "test-bucket", "tenant/a.txt", "service error"); match err { Error::Internal(message) => { diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 8c5e2fe13e..e890bc389f 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -126,7 +126,8 @@ This endpoint is the core entry point for resource management, supporting adding 3. Call the corresponding Parser to parse content 4. Build the directory tree and write to AGFS 5. Wait for semantic processing completion when `wait=true`; with `wait=false`, return a `task_id` for queue tracking -6. Set up scheduled update task if `watch_interval` is specified +6. If `reason` is non-empty, append it to the fixed resource reason session and commit through the normal memory extraction pipeline so suitable user memories can reference the resource URI +7. Set up scheduled update task if `watch_interval` is specified **Code Entry Points**: - `openviking/client/local.py:LocalClient.add_resource` - SDK entry (embedded) @@ -146,7 +147,7 @@ This endpoint is the core entry point for resource management, supporting adding | to | string | No | - | Target Viking URI (exact location). Mutually exclusive with `parent` | | parent | string | No | - | Parent Viking URI (resource placed under this directory). Mutually exclusive with `to` | | create_parent | bool | No | False | Automatically create parent directory if it does not exist (server-side flag) | -| reason | string | No | "" | Reason for adding the resource (for documentation and relevance improvement, experimental feature) | +| reason | string | No | "" | Reason for adding the resource. When non-empty, OpenViking runs it through the normal session memory extraction pipeline with the resource URI and records resource references in the resulting memory | | instruction | string | No | "" | Processing instructions for semantic extraction (experimental feature) | | wait | bool | No | False | Whether to wait for semantic processing and vectorization to complete before returning | | timeout | float | No | None | Timeout in seconds, only effective when `wait=True` | @@ -168,6 +169,8 @@ This endpoint is the core entry point for resource management, supporting adding - Raw HTTP calls for local files require first uploading via [temp_upload](#temp_upload) to obtain `temp_file_id` - When `to` is specified and the target already exists, triggers incremental update - Only Git repository sources use full background import when `wait=false`; OpenViking performs repository preflight and target planning before returning the `task_id`. +- Memory generated from `reason` is extracted through the same pipeline as `session.commit`. It uses `reason`, the resource URI, available source name, and available directory abstract; it does not inspect or expand the full resource content. OpenViking writes to existing memory types such as `entities`, `events`, or `preferences`, not a dedicated resource memory directory. +- When deleting a resource, OpenViking scans the self or peer memories targeted by the current context before deletion, removes the matching resource URI and content introduced by that `reason`, and refreshes the semantic index for the affected memories. - Other sources with `wait=false` finish source parsing, target resolution, and AGFS writes before returning. Only semantic and embedding queues continue asynchronously. - When `watch_interval > 0`, the watch task binds to `to` if provided; otherwise it binds to the `root_uri` returned by this import. If no stable `root_uri` is available, the request fails and asks for an explicit `to`. - Feishu/Lark app-token imports do not pass `args.feishu_access_token`. OpenViking keeps the existing app credential flow and the SDK obtains an app/tenant token from `app_id` and `app_secret`. This mode supports both one-time imports and `watch_interval > 0`. diff --git a/docs/en/api/03-filesystem.md b/docs/en/api/03-filesystem.md index ec9a2724eb..cce6170b11 100644 --- a/docs/en/api/03-filesystem.md +++ b/docs/en/api/03-filesystem.md @@ -634,6 +634,8 @@ openviking rm viking://resources/old.md [--recursive] The `estimated_deleted_count` field (for recursive deletes) contains the estimated number of items (files and directories) deleted (from vector index). The CLI will display this information in output. +When deleting `viking://resources/...`, the response may include `memory_cleanup`, indicating that user memories referencing that resource URI were cleaned up before deletion. + --- ### mv() diff --git a/docs/en/api/05-sessions.md b/docs/en/api/05-sessions.md index e2057cea73..10ac3f29c3 100644 --- a/docs/en/api/05-sessions.md +++ b/docs/en/api/05-sessions.md @@ -932,6 +932,7 @@ Commit a session. Message archiving (Phase 1) completes immediately. Summary gen - Rapid consecutive commits on the same session are accepted; each request gets its own `task_id`. - Background Phase 2 work is serialized by archive order: archive `N+1` waits until archive `N` writes `.done`. - If an earlier archive failed and left no `.done`, later commit requests fail with `FAILED_PRECONDITION` until that failure is resolved. +- If committed messages contain durable facts, judgments, preferences, or events that mention `viking://resources/...`, memory extraction preserves the resource as a markdown link and records it in `MEMORY_FIELDS.resource_refs`. **Code Entries:** - `openviking/session/session.py:Session.commit_async()` - Core implementation diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 43f6e12a77..799ba46e6e 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -121,7 +121,8 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector 3. 调用对应 Parser 解析内容 4. 构建目录树并写入 AGFS 5. `wait=true` 时等待语义处理完成;`wait=false` 时返回 `task_id` 用于队列跟踪 -6. 如指定 `--watch-interval`,设置定时更新任务 +6. 如果 `reason` 非空,将其追加到固定的资源 reason session 并 commit,复用常规记忆抽取链路,让合适的用户记忆引用该资源 URI +7. 如指定 `--watch-interval`,设置定时更新任务 **代码入口**: - `openviking/client/local.py:LocalClient.add_resource` - SDK 入口(嵌入式) @@ -141,7 +142,7 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector | to | string | 否 | - | 目标 Viking URI(精确位置)。与 `parent` 互斥 | | parent | string | 否 | - | 父级 Viking URI(资源放入此目录下)。与 `to` 互斥 | | create_parent | bool | 否 | False | 如果父目录不存在,自动创建父目录(服务端标志) | -| reason | string | 否 | "" | 添加资源的原因(用于文档化和相关性提升,实验特性) | +| reason | string | 否 | "" | 添加资源的原因;非空时会随资源 URI 进入常规 session 记忆抽取链路,并在生成的记忆中记录资源引用 | | instruction | string | 否 | "" | 语义提取的处理指令(实验特性) | | wait | bool | 否 | False | 是否等待语义处理和向量化完成才返回 | | timeout | float | 否 | None | 超时时间(秒),仅 `wait=true` 时生效 | @@ -161,6 +162,8 @@ URL/文件 Parser TreeBuilder AGFS Summarizer/Vector - `user_id` 和 `peer_id` 路径片段必须是安全的单段标识,例如 `alice` 或 `web-visitor-alice`。包含路径分隔符、`.`、`..`、`:` 或 `+` 的值会被拒绝。 - `path` 和 `temp_file_id` 不能同时指定,上传本地文件需要先通过 [temp_upload](#temp_upload) 上传获取 `temp_file_id`,在 SDK 和 CLI 中已经封装好。 - 只有 Git 仓库来源在 `wait=false` 时使用完整后台导入;OpenViking 会先完成仓库 preflight 和目标规划,再返回 `task_id`。 +- `reason` 触发的记忆生成复用 `session.commit` 的抽取链路,只使用 `reason`、资源 URI、可用的资源名称和目录摘要,不会读取或展开完整资源正文;系统会写入 `entities`、`events`、`preferences` 等已有记忆类型,不创建独立的资源记忆目录。 +- 删除资源时,系统会在删除前扫描本次上下文对应的 self 或 peer 记忆中的 `resource_refs`,清理对应资源 URI 和由该 `reason` 引入的内容,并重新刷新相关记忆的语义索引。 - 其他来源在 `wait=false` 时会在响应前完成来源解析、目标解析和 AGFS 写入,仅 semantic 与 embedding 队列继续异步处理。 - `watch_interval > 0` 时,如果指定了 `to`,监控任务绑定该目标;如果未指定 `to`,监控任务绑定本次导入返回的 `root_uri`。如果无法得到稳定 `root_uri`,请求会报错并要求显式传 `to`。 - 飞书/Lark 应用 token 导入不传 `args.feishu_access_token`。OpenViking 保持原有应用凭证流程,由 SDK 使用 `app_id` 和 `app_secret` 自动获取 app/tenant token。该模式支持一次性导入和 `watch_interval > 0`。 @@ -434,6 +437,7 @@ task_id uuid-xxx | `errors` | array | 处理过程中的错误列表 | | `warnings` | array | (可选)处理过程中的警告列表(仅在 `strict=False` 时可能出现) | | `queue_status` | object | (可选,仅当 `wait=true` 时)队列处理状态,包含 `pending`、`processing`、`completed` 计数 | +| `memory_linking` | object | (可选,仅当 `reason` 触发记忆生成时)本次资源 URI 与用户记忆的关联结果 | 对于 `wait=false` 的 Git 仓库来源,后台任务的 `task_type="add_resource"`,`resource_id` 等于返回的 `root_uri`。运行中的任务记录可能包含 `stage`;完成后的任务 `result` 会包含带有 semantic 和 embedding 汇总的 `queue_status`。 diff --git a/docs/zh/api/03-filesystem.md b/docs/zh/api/03-filesystem.md index fd7485f4db..67e801880a 100644 --- a/docs/zh/api/03-filesystem.md +++ b/docs/zh/api/03-filesystem.md @@ -635,6 +635,8 @@ openviking rm viking://resources/old.md [--recursive] `estimated_deleted_count` 字段(递归删除时)包含删除的项目(文件和目录)估计数量(来自向量索引)。CLI 会在输出中显示此信息。 +删除 `viking://resources/...` 时,响应可能包含 `memory_cleanup`,表示删除前已清理引用该资源 URI 的用户记忆。 + --- ### mv() diff --git a/docs/zh/api/05-sessions.md b/docs/zh/api/05-sessions.md index 5e531b7454..12b6f54d5e 100644 --- a/docs/zh/api/05-sessions.md +++ b/docs/zh/api/05-sessions.md @@ -906,6 +906,7 @@ await client.session_used( - 同一 session 的多次快速连续 commit 会被接受;每次请求都会拿到独立的 `task_id` - 后台 Phase 2 会按 archive 顺序串行推进:`archive_N+1` 会等待 `archive_N` 写出 `.done` 后再继续 - 如果更早的 archive 已失败且没有 `.done`,后续 commit 会直接返回错误,直到该失败被处理 +- 如果提交的消息中包含带 `viking://resources/...` 的长期事实、评价、偏好或事件,记忆抽取会把资源保留为 markdown 链接,并写入 `MEMORY_FIELDS.resource_refs` **代码入口**: - `openviking/session/session.py:Session.commit_async()` - 核心实现 diff --git a/openviking/async_client.py b/openviking/async_client.py index c4365171bf..af79e85743 100644 --- a/openviking/async_client.py +++ b/openviking/async_client.py @@ -503,10 +503,16 @@ async def ls(self, uri: str, **kwargs) -> List[Any]: show_all_hidden=show_all_hidden, ) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource""" await self._ensure_initialized() - await self._client.rm(uri, recursive=recursive) + await self._client.rm(uri, recursive=recursive, wait=wait, timeout=timeout) async def grep( self, diff --git a/openviking/client/local.py b/openviking/client/local.py index 2c59252849..dcd0732ba2 100644 --- a/openviking/client/local.py +++ b/openviking/client/local.py @@ -248,9 +248,21 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: """Create directory.""" await self._service.fs.mkdir(uri, ctx=self._ctx, description=description) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" - await self._service.fs.rm(uri, ctx=self._ctx, recursive=recursive) + await self._service.fs.rm( + uri, + ctx=self._ctx, + recursive=recursive, + wait=wait, + timeout=timeout, + ) async def mv(self, from_uri: str, to_uri: str) -> None: """Move resource.""" diff --git a/openviking/prompts/templates/memory/events.yaml b/openviking/prompts/templates/memory/events.yaml index a66e916e1a..1cb6c1f9ff 100644 --- a/openviking/prompts/templates/memory/events.yaml +++ b/openviking/prompts/templates/memory/events.yaml @@ -93,9 +93,14 @@ enabled: true # upsert 表示新增或更新(默认行为) operation_mode: "add_only" content_template: | + {% set resource_event_content = extract_context.get_resource_event_content(ranges, summary) %} + {% if resource_event_content %} + {{ resource_event_content }} + {% else %} Summary: {{ summary }} {{extract_context.get_first_message_time_with_weekday_from_ranges(ranges|default(''))|default('N/A')}} ChatLog: {{ extract_context.get_event_content(ranges, summary, 0) }} + {% endif %} embedding_template: |- EventName: {{ event_name }} Goal: {{ goal }} diff --git a/openviking/server/routers/filesystem.py b/openviking/server/routers/filesystem.py index 474dd218fd..210d560131 100644 --- a/openviking/server/routers/filesystem.py +++ b/openviking/server/routers/filesystem.py @@ -149,6 +149,8 @@ async def mkdir( async def rm( uri: str = Query(..., description="Viking URI"), recursive: bool = Query(False, description="Remove recursively"), + wait: bool = Query(False, description="Wait for semantic refresh to complete"), + timeout: Optional[float] = Query(None, description="Wait timeout in seconds"), _ctx: RequestContext = Depends(get_request_context), ): """Remove resource.""" @@ -156,7 +158,7 @@ async def rm( # Resolve path variables uri = resolve_path_variables(uri) try: - result = await service.fs.rm(uri, ctx=_ctx, recursive=recursive) + result = await service.fs.rm(uri, ctx=_ctx, recursive=recursive, wait=wait, timeout=timeout) except AGFSNotFoundError: raise NotFoundError(uri, "file") except AGFSClientError as e: @@ -173,6 +175,14 @@ async def rm( response_result = {"uri": uri} if isinstance(result, dict) and "estimated_deleted_count" in result: response_result["estimated_deleted_count"] = result["estimated_deleted_count"] + if isinstance(result, dict) and "memory_cleanup" in result: + response_result["memory_cleanup"] = result["memory_cleanup"] + if isinstance(result, dict) and "semantic_root_uri" in result: + response_result["semantic_root_uri"] = result["semantic_root_uri"] + if isinstance(result, dict) and "semantic_status" in result: + response_result["semantic_status"] = result["semantic_status"] + if isinstance(result, dict) and "queue_status" in result: + response_result["queue_status"] = result["queue_status"] return Response(status="ok", result=response_result) diff --git a/openviking/service/core.py b/openviking/service/core.py index 80697ea60c..9174d65a87 100644 --- a/openviking/service/core.py +++ b/openviking/service/core.py @@ -20,6 +20,7 @@ from openviking.service.fs_service import FSService from openviking.service.pack_service import PackService from openviking.service.relation_service import RelationService +from openviking.service.resource_memory_link_service import ResourceMemoryLinkService from openviking.service.resource_service import ResourceService from openviking.service.search_service import SearchService from openviking.service.session_service import SessionService @@ -119,6 +120,7 @@ def __init__( self._relation_service = RelationService() self._pack_service = PackService() self._search_service = SearchService() + self._resource_memory_link_service = ResourceMemoryLinkService() self._resource_service = ResourceService() self._session_service = SessionService() self._debug_service = DebugService() @@ -414,7 +416,14 @@ async def initialize(self) -> None: # Wire up sub-services self._fs_service.set_dependencies( viking_fs=self._viking_fs, + vikingdb=self._vikingdb_manager, privacy_config_service=self._privacy_config_service, + resource_memory_link_service=self._resource_memory_link_service, + ) + self._resource_memory_link_service.set_dependencies( + vikingdb=self._vikingdb_manager, + viking_fs=self._viking_fs, + session_service=self._session_service, ) self._relation_service.set_viking_fs(self._viking_fs) self._pack_service.set_dependencies( @@ -428,6 +437,7 @@ async def initialize(self) -> None: resource_processor=self._resource_processor, skill_processor=self._skill_processor, watch_scheduler=self._watch_scheduler, + resource_memory_link_service=self._resource_memory_link_service, ) self._session_service.set_dependencies( vikingdb=self._vikingdb_manager, diff --git a/openviking/service/fs_service.py b/openviking/service/fs_service.py index c0cb47446c..e02ddfb722 100644 --- a/openviking/service/fs_service.py +++ b/openviking/service/fs_service.py @@ -6,7 +6,7 @@ Provides file system operations: ls, mkdir, rm, mv, tree, stat, read, abstract, overview, grep, glob. """ -from typing import Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional from openviking.core.namespace import context_type_for_uri from openviking.core.uri_validation import validate_optional_viking_uri, validate_viking_uri @@ -16,14 +16,24 @@ restore_skill_content, ) from openviking.server.identity import RequestContext +from openviking.session.memory.memory_updater import MemoryUpdater from openviking.storage.content_write import ContentWriteCoordinator +from openviking.storage.queuefs import SemanticMsg, get_queue_manager +from openviking.storage.queuefs.semantic_msg import build_semantic_coalesce_key from openviking.storage.viking_fs import VikingFS +from openviking.telemetry import get_current_telemetry +from openviking.telemetry.request_wait_tracker import get_request_wait_tracker +from openviking.telemetry.resource_summary import build_queue_status_payload from openviking.utils.embedding_utils import vectorize_directory_meta -from openviking_cli.exceptions import NotInitializedError +from openviking_cli.exceptions import DeadlineExceededError, NotInitializedError from openviking_cli.utils import VikingURI, get_logger logger = get_logger(__name__) +if TYPE_CHECKING: + from openviking.service.resource_memory_link_service import ResourceMemoryLinkService + from openviking.storage import VikingDBManager + class FSService: """File system operations service.""" @@ -31,19 +41,27 @@ class FSService: def __init__( self, viking_fs: Optional[VikingFS] = None, + vikingdb: Optional["VikingDBManager"] = None, privacy_config_service: Optional[UserPrivacyConfigService] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ): self._viking_fs = viking_fs + self._vikingdb = vikingdb self._privacy_config_service = privacy_config_service + self._resource_memory_link_service = resource_memory_link_service def set_dependencies( self, viking_fs: VikingFS, + vikingdb: Optional["VikingDBManager"] = None, privacy_config_service: Optional[UserPrivacyConfigService] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ) -> None: """Set service dependencies (for deferred initialization).""" self._viking_fs = viking_fs + self._vikingdb = vikingdb self._privacy_config_service = privacy_config_service + self._resource_memory_link_service = resource_memory_link_service def _ensure_initialized(self) -> VikingFS: """Ensure VikingFS is initialized.""" @@ -158,12 +176,192 @@ def _resolve_directory_uris(uri: str) -> tuple[str, str]: return directory_uri, abstract_uri async def rm( - self, uri: str, ctx: RequestContext, recursive: bool = False + self, + uri: str, + ctx: RequestContext, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, ) -> Optional[Dict[str, Any]]: """Remove resource.""" uri = validate_viking_uri(uri) viking_fs = self._ensure_initialized() - return await viking_fs.rm(uri, recursive=recursive, ctx=ctx) + cleanup_result: Optional[Dict[str, Any]] = None + context_type = context_type_for_uri(uri) + refresh_parent_uri = self._semantic_refresh_parent_uri(uri, context_type) + memory_overview_uri = self._memory_overview_parent_uri(uri, context_type) + result = await viking_fs.rm(uri, recursive=recursive, ctx=ctx) + queue_status = None + request_registered = False + telemetry_id = get_current_telemetry().telemetry_id + try: + if refresh_parent_uri: + if wait and telemetry_id: + get_request_wait_tracker().register_request(telemetry_id) + request_registered = True + await self._enqueue_delete_refresh( + root_uri=refresh_parent_uri, + deleted_uri=uri, + context_type=context_type, + ctx=ctx, + ) + if self._resource_memory_link_service and context_type == "resource": + cleanup_result = await self._resource_memory_link_service.before_resource_delete( + ctx=ctx, + resource_uri=uri, + recursive=recursive, + ) + if memory_overview_uri: + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=memory_overview_uri, + ctx=ctx, + ) + for cleanup_overview_uri in self._memory_overview_parent_uris_from_cleanup( + cleanup_result + ): + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=cleanup_overview_uri, + ctx=ctx, + ) + if refresh_parent_uri and wait: + queue_status = await self._wait_for_refresh(timeout=timeout) + finally: + if request_registered: + get_request_wait_tracker().cleanup(telemetry_id) + if cleanup_result is not None and isinstance(result, dict): + result["memory_cleanup"] = cleanup_result + if refresh_parent_uri and isinstance(result, dict): + result["semantic_root_uri"] = refresh_parent_uri + result["semantic_status"] = self._semantic_refresh_status( + wait=wait, + queue_status=queue_status, + ) + if queue_status is not None: + result["queue_status"] = queue_status + return result + + @staticmethod + def _semantic_refresh_status( + *, + wait: bool, + queue_status: Optional[Dict[str, Any]], + ) -> str: + if not wait: + return "queued" + if not isinstance(queue_status, dict): + return "complete" + semantic = queue_status.get("Semantic", {}) + if not isinstance(semantic, dict): + return "complete" + try: + if int(semantic.get("error_count", 0) or 0) > 0: + return "failed" + except (TypeError, ValueError): + if semantic.get("errors"): + return "failed" + if semantic.get("errors"): + return "failed" + return "complete" + + @staticmethod + def _semantic_refresh_parent_uri(uri: str, context_type: str) -> Optional[str]: + if context_type != "resource": + return None + parent = VikingURI(uri).parent + return parent.uri if parent else None + + @staticmethod + def _memory_overview_parent_uri(uri: str, context_type: str) -> Optional[str]: + if context_type != "memory": + return None + leaf = uri.rstrip("/").rsplit("/", 1)[-1] + if leaf in {".abstract.md", ".overview.md", ".relations.json"}: + return None + parent = VikingURI(uri).parent + if parent is None: + return None + if not MemoryUpdater.memory_type_from_uri(parent.uri): + return None + return parent.uri + + @classmethod + def _memory_overview_parent_uris_from_cleanup( + cls, + cleanup_result: Optional[Dict[str, Any]], + ) -> List[str]: + if not isinstance(cleanup_result, dict): + return [] + + overview_uris: List[str] = [] + for field in ("memory_uris", "deleted_memory_uris"): + values = cleanup_result.get(field) + if not isinstance(values, list): + continue + for memory_uri in values: + if not isinstance(memory_uri, str): + continue + overview_uri = cls._memory_overview_parent_uri( + memory_uri, + context_type_for_uri(memory_uri), + ) + if overview_uri: + overview_uris.append(overview_uri) + return list(dict.fromkeys(overview_uris)) + + async def _enqueue_delete_refresh( + self, + *, + root_uri: str, + deleted_uri: str, + context_type: str, + ctx: RequestContext, + ) -> None: + queue_manager = get_queue_manager() + semantic_queue = queue_manager.get_queue(queue_manager.SEMANTIC, allow_create=True) + telemetry_id = get_current_telemetry().telemetry_id + msg = SemanticMsg( + uri=root_uri, + context_type=context_type, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + peer_id=ctx.user.user_id, + role=ctx.role.value, + skip_vectorization=False, + telemetry_id=telemetry_id, + coalesce_key=build_semantic_coalesce_key( + context_type=context_type, + uri=root_uri, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + peer_id=ctx.user.user_id, + ), + changes={"deleted": [deleted_uri]}, + ) + if telemetry_id: + get_request_wait_tracker().register_semantic_root(telemetry_id, msg.id) + try: + await semantic_queue.enqueue(msg) + except Exception as exc: + if telemetry_id: + get_request_wait_tracker().mark_semantic_failed(telemetry_id, msg.id, str(exc)) + raise + + async def _wait_for_refresh(self, *, timeout: Optional[float]) -> Dict[str, Any]: + telemetry_id = get_current_telemetry().telemetry_id + if telemetry_id: + try: + await get_request_wait_tracker().wait_for_request(telemetry_id, timeout=timeout) + except TimeoutError as exc: + raise DeadlineExceededError("queue processing", timeout) from exc + return get_request_wait_tracker().build_queue_status(telemetry_id) + try: + return build_queue_status_payload( + await get_queue_manager().wait_complete(timeout=timeout) + ) + except TimeoutError as exc: + raise DeadlineExceededError("queue processing", timeout) from exc async def mv(self, from_uri: str, to_uri: str, ctx: RequestContext) -> None: """Move resource.""" @@ -300,7 +498,7 @@ async def write( """Write to an existing file and refresh semantics/vectors.""" uri = validate_viking_uri(uri) viking_fs = self._ensure_initialized() - coordinator = ContentWriteCoordinator(viking_fs=viking_fs) + coordinator = ContentWriteCoordinator(viking_fs=viking_fs, vikingdb=self._vikingdb) return await coordinator.write( uri=uri, content=content, diff --git a/openviking/service/resource_memory_link_service.py b/openviking/service/resource_memory_link_service.py new file mode 100644 index 0000000000..07a311ff51 --- /dev/null +++ b/openviking/service/resource_memory_link_service.py @@ -0,0 +1,567 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Link resource addition reasons to user memories. + +This module keeps resource files immutable: all traceability lives in memory +files' MEMORY_FIELDS metadata. +""" + +from __future__ import annotations + +import asyncio +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence + +from openviking.core.namespace import ( + NamespaceShapeError, + canonical_user_root, + canonicalize_uri, + context_type_for_uri, + uri_parts, +) +from openviking.core.peer_id import normalize_peer_id +from openviking.message.part import TextPart +from openviking.server.identity import RequestContext +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.memory_updater import ( + MemoryUpdater, + MemoryUpdateResult, +) +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + content_references_resource, + extract_resource_uris, + remove_resource_references_from_memory, + resource_ref_matches, +) +from openviking.storage import VikingDBManager +from openviking.storage.viking_fs import VikingFS, get_viking_fs +from openviking_cli.exceptions import NotFoundError +from openviking_cli.utils import get_logger + +if TYPE_CHECKING: + from openviking.service.session_service import SessionService + +logger = get_logger(__name__) + +_RESOURCE_REASON_SESSION_ID = "__openviking_resource_reason__" +_RESOURCE_REASON_MEMORY_TYPES = ["entities", "events", "preferences"] +_RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS = 1800.0 +_RESOURCE_ABSTRACT_MAX_CHARS = 200 +_ABSTRACT_NOT_READY_MARKERS = ( + "[.abstract.md is not ready]", + "[Directory abstract is not ready]", +) + + +def _resource_reason_memory_policy(target_peer_id: Optional[str] = None) -> Dict[str, Any]: + peer_targeted = bool(target_peer_id) + return { + "self": {"enabled": not peer_targeted}, + "peer": {"enabled": peer_targeted}, + "memory_types": list(_RESOURCE_REASON_MEMORY_TYPES), + } + + +def _resource_reason_peer_id(ctx: RequestContext, resource_uri: str) -> Optional[str]: + actor_peer_id = normalize_peer_id(ctx.actor_peer_id) + if actor_peer_id: + return actor_peer_id + return _peer_id_from_resource_uri(resource_uri, ctx) + + +def _peer_id_from_resource_uri(resource_uri: str, ctx: RequestContext) -> Optional[str]: + try: + parts = uri_parts(canonicalize_uri(resource_uri, ctx)) + except (NamespaceShapeError, ValueError): + return None + if len(parts) >= 5 and parts[0] == "user" and parts[2] == "peers": + return normalize_peer_id(parts[3]) + return None + + +def _memory_roots_for_resource_refs(ctx: RequestContext, resource_uri: str) -> List[str]: + user_root = canonical_user_root(ctx) + target_peer_id = _resource_reason_peer_id(ctx, resource_uri) + if target_peer_id: + return [f"{user_root}/peers/{target_peer_id}/memories"] + return [f"{user_root}/memories"] + + +@dataclass +class _MemoryRefMatch: + memory_uri: str + memory_file: MemoryFile + resource_ref: Dict[str, Any] + + +class ResourceMemoryLinkService: + """Create and clean memory references for resources added with a reason.""" + + def __init__( + self, + *, + vikingdb: Optional[VikingDBManager] = None, + viking_fs: Optional[VikingFS] = None, + session_service: Optional["SessionService"] = None, + ): + self._vikingdb = vikingdb + self._viking_fs = viking_fs + self._session_service = session_service + self._reason_session_lock = asyncio.Lock() + + def set_dependencies( + self, + *, + vikingdb: Optional[VikingDBManager], + viking_fs: VikingFS, + session_service: Optional["SessionService"] = None, + ) -> None: + self._vikingdb = vikingdb + self._viking_fs = viking_fs + if session_service is not None: + self._session_service = session_service + + def _get_viking_fs(self) -> VikingFS: + return self._viking_fs or get_viking_fs() + + async def on_resource_added( + self, + *, + ctx: RequestContext, + resource_uri: str, + reason: str, + source_name: Optional[str] = None, + timeout: Optional[float] = None, + ) -> Dict[str, Any]: + """Bridge add-resource reason extraction through normal session commit.""" + reason = (reason or "").strip() + if not reason: + return {"status": "skipped", "reason": "empty_reason"} + if not resource_uri: + return {"status": "skipped", "reason": "empty_resource_uri"} + if not self._session_service: + return {"status": "skipped", "reason": "session_service_unavailable"} + + added_at = datetime.now(timezone.utc).isoformat() + resource_abstract = await self._read_resource_directory_abstract(resource_uri, ctx) + session_id = _RESOURCE_REASON_SESSION_ID + target_peer_id = _resource_reason_peer_id(ctx, resource_uri) + commit_result: Dict[str, Any] = {} + task_result: Optional[Dict[str, Any]] = None + + async with self._reason_session_lock: + session = await self._session_service.get( + session_id, + ctx, + auto_create=True, + ) + session.meta.memory_policy = _resource_reason_memory_policy(target_peer_id) + message_spec: Dict[str, Any] = { + "role": "user", + "parts": [ + TextPart( + text=self._build_resource_addition_message( + resource_uri=resource_uri, + reason=reason, + source_name=source_name, + added_at=added_at, + resource_abstract=resource_abstract, + ) + ) + ], + "created_at": added_at, + } + if target_peer_id: + message_spec["peer_id"] = target_peer_id + session.add_messages([message_spec]) + commit_result = await self._session_service.commit_async( + session_id, + ctx, + keep_recent_count=0, + ) + + task_id = commit_result.get("task_id") + if task_id: + task_result = await self._wait_for_commit_task( + task_id=str(task_id), + ctx=ctx, + timeout=timeout, + ) + return { + "status": "success", + "session_id": session_id, + "commit_task_id": task_id, + "archive_uri": commit_result.get("archive_uri"), + "commit_task": task_result, + } + + @staticmethod + def _build_resource_addition_message( + *, + resource_uri: str, + reason: str, + source_name: Optional[str], + added_at: str, + resource_abstract: str, + ) -> str: + return ( + "## Resource Addition\n" + f"Resource URI: {resource_uri}\n" + f"Source name: {source_name or 'N/A'}\n" + f"Added at: {added_at or 'N/A'}\n" + f"Resource abstract: {resource_abstract or 'N/A'}\n" + f"User reason: {reason}" + ) + + async def _wait_for_commit_task( + self, + *, + task_id: str, + ctx: RequestContext, + timeout: Optional[float], + ) -> Dict[str, Any]: + from openviking.service.task_tracker import get_task_tracker + + async def _poll() -> Dict[str, Any]: + tracker = get_task_tracker() + while True: + task = await tracker.get( + task_id, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + if task is None: + raise RuntimeError(f"session commit task not found: {task_id}") + status = task.status.value if hasattr(task.status, "value") else str(task.status) + if status == "completed": + return task.to_dict() + if status == "failed": + raise RuntimeError(task.error or f"session commit task failed: {task_id}") + await asyncio.sleep(0.1) + + return await asyncio.wait_for( + _poll(), + timeout=timeout or _RESOURCE_REASON_COMMIT_TIMEOUT_SECONDS, + ) + + async def before_resource_delete( + self, + *, + ctx: RequestContext, + resource_uri: str, + recursive: bool = False, + ) -> Dict[str, Any]: + """Remove references from user memories before deleting a resource.""" + if context_type_for_uri(resource_uri) != "resource": + return {"status": "skipped", "reason": "not_resource"} + + matches = await self._find_referencing_memories( + ctx=ctx, + resource_uri=resource_uri, + recursive=recursive, + ) + if not matches: + return {"status": "no_references", "memory_uris": []} + + cleaned: List[str] = [] + deleted: List[str] = [] + errors: List[str] = [] + grouped = self._group_matches_by_memory(matches) + for memory_uri, memory_matches in grouped.items(): + first = memory_matches[0] + reason = str(first.resource_ref.get("reason") or "") + try: + cleanup_result = await self._cleanup_memory_reference( + ctx=ctx, + memory_uri=memory_uri, + memory_file=first.memory_file, + resource_uri=resource_uri, + reason=reason, + recursive=recursive, + ) + cleaned.extend(cleanup_result.written_uris + cleanup_result.edited_uris) + deleted.extend(cleanup_result.deleted_uris) + if memory_uri in cleanup_result.deleted_uris: + continue + if not cleanup_result.has_changes(): + await self._remove_resource_refs( + memory_uri, + resource_uri, + ctx, + recursive=recursive, + ) + cleaned.append(memory_uri) + await self._assert_resource_unlinked( + memory_uri, + resource_uri, + ctx, + recursive=recursive, + ) + except NotFoundError: + deleted.append(memory_uri) + except Exception as exc: + errors.append(f"{memory_uri}: {exc}") + + if errors: + raise RuntimeError( + "resource memory cleanup failed before deleting resource: " + "; ".join(errors) + ) + return { + "status": "success", + "memory_uris": list(dict.fromkeys(cleaned)), + "deleted_memory_uris": list(dict.fromkeys(deleted)), + } + + async def _cleanup_memory_reference( + self, + *, + ctx: RequestContext, + memory_uri: str, + memory_file: MemoryFile, + resource_uri: str, + reason: str, + recursive: bool = False, + ) -> MemoryUpdateResult: + del reason + viking_fs = self._get_viking_fs() + current = memory_file + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + current = MemoryFileUtils.read(raw, uri=memory_uri) + except (NotFoundError, FileNotFoundError): + result = MemoryUpdateResult() + result.add_deleted(memory_uri) + return result + + changed = remove_resource_references_from_memory( + current, + resource_uri, + recursive=recursive, + ) + result = MemoryUpdateResult() + if not changed: + return result + + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(current), ctx=ctx) + result.add_edited(memory_uri) + if await self._delete_empty_cleanup_memory(memory_uri, ctx): + self._mark_result_deleted(result, memory_uri) + return result + + async def _delete_empty_cleanup_memory(self, memory_uri: str, ctx: RequestContext) -> bool: + """Delete memory files whose visible content was emptied by resource cleanup.""" + if context_type_for_uri(memory_uri) != "memory": + return False + viking_fs = self._get_viking_fs() + try: + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + except (NotFoundError, FileNotFoundError): + return True + mf = MemoryFileUtils.read(raw, uri=memory_uri) + if (mf.content or "").strip(): + return False + directory_uri = memory_uri.rsplit("/", 1)[0] + await viking_fs.rm(memory_uri, recursive=False, ctx=ctx) + await MemoryUpdater.refresh_schema_overview( + viking_fs=viking_fs, + directory_uri=directory_uri, + ctx=ctx, + ) + logger.info("Deleted empty memory after resource cleanup: %s", memory_uri) + return True + + @staticmethod + def _mark_result_deleted(result: MemoryUpdateResult, uri: str) -> None: + result.written_uris = [item for item in result.written_uris if item != uri] + result.edited_uris = [item for item in result.edited_uris if item != uri] + if uri not in result.deleted_uris: + result.add_deleted(uri) + + async def _remove_resource_refs( + self, + memory_uri: str, + resource_uri: str, + ctx: RequestContext, + *, + recursive: bool, + ) -> None: + viking_fs = self._get_viking_fs() + raw = await viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=memory_uri) + refs = [ + ref + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")) + if not self._resource_ref_matches( + ref.get("resource_uri"), + resource_uri, + recursive=recursive, + ) + ] + if refs: + mf.extra_fields["resource_refs"] = refs + else: + mf.extra_fields.pop("resource_refs", None) + await viking_fs.write_file(memory_uri, MemoryFileUtils.write(mf), ctx=ctx) + + async def _find_referencing_memories( + self, + *, + ctx: RequestContext, + resource_uri: str, + recursive: bool, + ) -> List[_MemoryRefMatch]: + viking_fs = self._get_viking_fs() + matches: List[_MemoryRefMatch] = [] + entries: List[Dict[str, Any]] = [] + for memory_root in _memory_roots_for_resource_refs(ctx, resource_uri): + try: + entries.extend( + await viking_fs.tree( + memory_root, + ctx=ctx, + node_limit=1000000, + level_limit=None, + ) + ) + except Exception: + continue + + for entry in entries: + uri = entry.get("uri", "") + rel_path = entry.get("rel_path", "") + if entry.get("isDir") or not uri.endswith(".md"): + continue + if rel_path.endswith("/.abstract.md") or rel_path.endswith("/.overview.md"): + continue + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=uri) + except Exception: + continue + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): + if self._resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive): + matches.append(_MemoryRefMatch(uri, mf, ref)) + if not any( + match.memory_uri == uri for match in matches + ) and content_references_resource( + mf.content, + resource_uri, + recursive=recursive, + ): + matched_uri = next( + ( + item + for item in extract_resource_uris(mf.content) + if self._resource_ref_matches(item, resource_uri, recursive) + ), + resource_uri, + ) + matches.append( + _MemoryRefMatch( + uri, + mf, + { + "resource_uri": matched_uri, + "source": "visible_content", + }, + ) + ) + return matches + + async def _read_resource_directory_abstract( + self, + resource_uri: str, + ctx: RequestContext, + ) -> str: + """Best-effort directory abstract lookup for resource-addition readability.""" + viking_fs = self._get_viking_fs() + for abstract_uri in self._resource_abstract_uri_candidates(resource_uri): + try: + abstract = await viking_fs.read_file(abstract_uri, ctx=ctx) + except Exception: + continue + abstract = self._clean_resource_abstract(abstract) + if abstract: + return abstract + return "" + + @classmethod + def _resource_abstract_uri_candidates(cls, resource_uri: str) -> List[str]: + normalized = (resource_uri or "").strip().rstrip("/") + if not normalized: + return [] + candidates = [f"{normalized}/.abstract.md"] + parent = cls._parent_uri(normalized) + if parent: + candidates.append(f"{parent}/.abstract.md") + return list(dict.fromkeys(candidates)) + + @staticmethod + def _parent_uri(uri: str) -> str: + scheme_index = uri.find("://") + min_slash_index = scheme_index + 3 if scheme_index >= 0 else 0 + slash_index = uri.rfind("/") + if slash_index <= min_slash_index: + return "" + return uri[:slash_index] + + @staticmethod + def _clean_resource_abstract(abstract: Any) -> str: + text = " ".join(str(abstract or "").split()) + if not text: + return "" + if any(text == marker or text.endswith(marker) for marker in _ABSTRACT_NOT_READY_MARKERS): + return "" + if len(text) > _RESOURCE_ABSTRACT_MAX_CHARS: + return text[: _RESOURCE_ABSTRACT_MAX_CHARS - 3].rstrip() + "..." + return text + + async def _assert_resource_unlinked( + self, + memory_uri: str, + resource_uri: str, + ctx: RequestContext, + *, + recursive: bool = True, + ) -> None: + try: + raw = await self._get_viking_fs().read_file(memory_uri, ctx=ctx) + except (NotFoundError, FileNotFoundError) as exc: + raise NotFoundError(memory_uri, "memory") from exc + mf = MemoryFileUtils.read(raw, uri=memory_uri) + if content_references_resource(mf.content, resource_uri, recursive=recursive): + raise RuntimeError(f"memory content still contains deleted resource URI: {memory_uri}") + for ref in self._coerce_resource_refs(mf.extra_fields.get("resource_refs")): + if self._resource_ref_matches( + ref.get("resource_uri"), + resource_uri, + recursive=recursive, + ): + raise RuntimeError(f"memory still contains resource ref: {memory_uri}") + + @staticmethod + def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + + @staticmethod + def _group_matches_by_memory( + matches: Sequence[_MemoryRefMatch], + ) -> Dict[str, List[_MemoryRefMatch]]: + grouped: Dict[str, List[_MemoryRefMatch]] = {} + for match in matches: + grouped.setdefault(match.memory_uri, []).append(match) + return grouped + + @staticmethod + def _resource_ref_matches( + ref_uri: Any, + target_uri: str, + recursive: bool, + ) -> bool: + return resource_ref_matches(ref_uri, target_uri, recursive=recursive) diff --git a/openviking/service/resource_service.py b/openviking/service/resource_service.py index ff12e241f6..81db4ee3bb 100644 --- a/openviking/service/resource_service.py +++ b/openviking/service/resource_service.py @@ -58,6 +58,7 @@ if TYPE_CHECKING: from openviking.resource.watch_manager import WatchManager from openviking.resource.watch_scheduler import WatchScheduler + from openviking.service.resource_memory_link_service import ResourceMemoryLinkService logger = get_logger(__name__) @@ -118,12 +119,14 @@ def __init__( resource_processor: Optional[ResourceProcessor] = None, skill_processor: Optional[SkillProcessor] = None, watch_scheduler: Optional["WatchScheduler"] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ): self._vikingdb = vikingdb self._viking_fs = viking_fs self._resource_processor = resource_processor self._skill_processor = skill_processor self._watch_scheduler = watch_scheduler + self._resource_memory_link_service = resource_memory_link_service self._background_tasks: set[asyncio.Task[Any]] = set() def set_dependencies( @@ -133,6 +136,7 @@ def set_dependencies( resource_processor: ResourceProcessor, skill_processor: SkillProcessor, watch_scheduler: Optional["WatchScheduler"] = None, + resource_memory_link_service: Optional["ResourceMemoryLinkService"] = None, ) -> None: """Set dependencies (for deferred initialization).""" self._vikingdb = vikingdb @@ -140,6 +144,7 @@ def set_dependencies( self._resource_processor = resource_processor self._skill_processor = skill_processor self._watch_scheduler = watch_scheduler + self._resource_memory_link_service = resource_memory_link_service def _get_watch_manager(self) -> Optional["WatchManager"]: if not self._watch_scheduler: @@ -718,6 +723,14 @@ async def add_resource( logger.warning( f"[ResourceService] Failed to cancel watch task for {target.to}: {e}" ) + if wait: + await self._link_resource_reason_memory( + result=result, + ctx=ctx, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, + ) if not wait: from openviking.service.task_tracker import get_task_tracker @@ -733,25 +746,33 @@ async def add_resource( if telemetry_id: monitor_started = True background = asyncio.create_task( - self._monitor_queue_processing( + self._monitor_resource_queue_then_link_memory( task.task_id, telemetry_id, - ctx.account_id, - ctx.user.user_id, + ctx, + root_uri=root_uri, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, ) ) self._background_tasks.add(background) background.add_done_callback(self._background_tasks.discard) else: - await task_tracker.start( - task.task_id, account_id=ctx.account_id, user_id=ctx.user.user_id - ) - await task_tracker.complete( - task.task_id, - {"root_uri": root_uri}, - account_id=ctx.account_id, - user_id=ctx.user.user_id, + monitor_started = True + background = asyncio.create_task( + self._monitor_resource_queue_then_link_memory( + task.task_id, + None, + ctx, + root_uri=root_uri, + reason=reason, + source_name=kwargs.get("source_name"), + timeout=timeout, + ) ) + self._background_tasks.add(background) + background.add_done_callback(self._background_tasks.discard) return result except Exception as exc: telemetry.set_error( @@ -769,6 +790,95 @@ async def add_resource( get_request_wait_tracker().cleanup(telemetry_id) unregister_wait_telemetry(telemetry_id) + async def _link_resource_reason_memory( + self, + *, + result: Dict[str, Any], + ctx: RequestContext, + reason: str, + source_name: Optional[str], + timeout: Optional[float] = None, + ) -> None: + if not self._resource_memory_link_service: + return + if not (reason or "").strip(): + return + root_uri = result.get("root_uri") + if not root_uri: + return + try: + link_result = await self._resource_memory_link_service.on_resource_added( + ctx=ctx, + resource_uri=root_uri, + reason=reason, + source_name=source_name, + timeout=timeout, + ) + result["memory_linking"] = link_result + except Exception as exc: + logger.warning("[ResourceService] Failed to link resource reason memory: %s", exc) + result.setdefault("warnings", []).append(f"Memory linking failed: {exc}") + + async def _monitor_resource_queue_then_link_memory( + self, + task_id: str, + telemetry_id: Optional[str], + ctx: RequestContext, + *, + root_uri: str, + reason: str, + source_name: Optional[str], + timeout: Optional[float], + ) -> None: + from openviking.service.task_tracker import get_task_tracker + + task_tracker = get_task_tracker() + request_wait_tracker = get_request_wait_tracker() + await task_tracker.start(task_id, account_id=ctx.account_id, user_id=ctx.user.user_id) + try: + if telemetry_id: + await request_wait_tracker.wait_for_request(telemetry_id) + status = request_wait_tracker.build_queue_status(telemetry_id) + else: + status = build_queue_status_payload( + await get_queue_manager().wait_complete(timeout=timeout) + ) + errors = sum(int(group.get("error_count", 0) or 0) for group in status.values()) + if errors: + await task_tracker.fail( + task_id, + f"queue processing failed: {status}", + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + return + + result: Dict[str, Any] = {"root_uri": root_uri, "queue_status": status} + await self._link_resource_reason_memory( + result=result, + ctx=ctx, + reason=reason, + source_name=source_name, + timeout=timeout, + ) + await task_tracker.complete( + task_id, + result, + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + except Exception as exc: + await task_tracker.fail( + task_id, + str(exc), + account_id=ctx.account_id, + user_id=ctx.user.user_id, + ) + finally: + if telemetry_id: + request_wait_tracker.cleanup(telemetry_id) + unregister_wait_telemetry(telemetry_id) + async def _monitor_queue_processing( self, task_id: str, diff --git a/openviking/session/memory/memory_updater.py b/openviking/session/memory/memory_updater.py index 7880344e3c..b3de674d09 100644 --- a/openviking/session/memory/memory_updater.py +++ b/openviking/session/memory/memory_updater.py @@ -29,6 +29,10 @@ from openviking.session.memory.merge_op import MergeOpFactory from openviking.session.memory.page_id_map import PageIdMap from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + RESOURCE_REF_SOURCE_SESSION_COMMIT, + sync_memory_resource_refs, +) from openviking.session.memory.utils.template_utils import TemplateUtils from openviking.session.memory.utils.uri import render_template from openviking.storage.viking_fs import get_viking_fs @@ -36,12 +40,20 @@ from openviking.telemetry.request_wait_tracker import get_request_wait_tracker from openviking.utils.time_utils import parse_iso_datetime from openviking_cli.exceptions import NotFoundError -from openviking_cli.utils import get_logger +from openviking_cli.utils import VikingURI, get_logger logger = get_logger(__name__) _EXTRACTION_CHUNK_MIN_CHARS = 100 _EXTRACTION_CHUNK_BOUNDARY_RE = re.compile(r"(\n+|[。!?;!?;]+|(? str: + """Return a user-readable event body for add-resource derived events.""" + if not ranges_str: + return "" + additions = self._resource_additions_from_ranges(ranges_str) + if not additions: + return "" + addition = additions[0] + resource_uri = addition.get("Resource URI", "") + if not resource_uri: + return "" + return self._link_resource_summary(summary or "", resource_uri, addition).strip() + + def _resource_additions_from_ranges(self, ranges_str: str) -> List[Dict[str, str]]: + msg_range = self.read_message_ranges(ranges_str) + additions: List[Dict[str, str]] = [] + for msg_group in msg_range.elements: + for msg in msg_group: + text = self._message_text(msg) + if "## Resource Addition" not in text: + continue + fields = { + match.group(1): match.group(2).strip() + for match in _RESOURCE_ADDITION_FIELD_RE.finditer(text) + } + if fields.get("Resource URI"): + additions.append(fields) + return additions + + @staticmethod + def _message_text(message: Message) -> str: + parts = getattr(message, "parts", []) + texts = [part.text for part in parts if isinstance(part, TextPart) and part.text] + if texts: + return "\n".join(texts) + return message.content or "" + + @classmethod + def _link_resource_summary( + cls, + summary: str, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + text = (summary or "").strip() + if not text: + return cls._resource_addition_fallback_sentence(resource_uri, addition) + if f"]({resource_uri})" in text: + return text + if resource_uri in text: + return cls._replace_bare_resource_uri(text, resource_uri, addition) + label = cls._resource_label_from_addition(addition) + return cls._finish_sentence(f"{text.rstrip('。.!')},关联资源为[{label}]({resource_uri})") + + @classmethod + def _replace_bare_resource_uri( + cls, + text: str, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + uri_start = text.find(resource_uri) + if uri_start < 0: + return text + prefix = text[:uri_start] + suffix = text[uri_start + len(resource_uri) :] + marker = _RESOURCE_URI_MARKER_RE.search(prefix) + if marker: + visible_prefix = prefix[: marker.start()].rstrip(",,;;:: ") + label = cls._resource_clause_from_summary_prefix(visible_prefix) + if not label: + label = cls._resource_label_from_addition(addition) + if label and visible_prefix.endswith(label): + visible_prefix = visible_prefix[: -len(label)] + f"[{label}]({resource_uri})" + else: + visible_prefix = f"{visible_prefix}[{label}]({resource_uri})" + return cls._finish_sentence(visible_prefix) + + label = cls._resource_label_from_addition(addition) + return cls._finish_sentence(f"{prefix.rstrip()}[{label}]({resource_uri}){suffix.strip()}") + + @staticmethod + def _resource_clause_from_summary_prefix(prefix: str) -> str: + text = prefix.strip(",,;;:: ") + tail = re.split(r"[,,;;。.!??]", text)[-1].strip() + return tail if 0 < len(tail) <= 120 else "" + + @classmethod + def _resource_label_from_addition(cls, addition: Dict[str, str]) -> str: + reason = addition.get("User reason", "").strip() + for prefix in ("这是一张", "这是一个", "该资源是", "这个是", "这是"): + if reason.startswith(prefix): + reason = reason[len(prefix) :].strip() + break + reason = reason.strip("。.!! ") + if reason: + return reason[:80] + source_name = addition.get("Source name", "").strip() + return source_name or "相关资源" + + @classmethod + def _resource_addition_fallback_sentence( + cls, + resource_uri: str, + addition: Dict[str, str], + ) -> str: + label = cls._resource_label_from_addition(addition) + return f"用户保存了[{label}]({resource_uri})。" + + @staticmethod + def _finish_sentence(text: str) -> str: + text = text.strip(",,;;:: ") + if text.endswith(("。", ".", "!", "!", "?", "?")): + return text + return text + "。" + def read_message_ranges(self, ranges_str: str) -> "MessageRange": """Parse ranges string like "0-10,50-60" or "7,9,11,13" and return combined MessageRange. @@ -503,6 +631,70 @@ def _get_viking_fs(self): self._viking_fs = get_viking_fs() return self._viking_fs + @classmethod + async def refresh_schema_overview( + cls, + *, + viking_fs: Any, + directory_uri: str, + ctx: RequestContext, + ) -> None: + memory_type = cls.memory_type_from_uri(directory_uri) + if not memory_type: + return + try: + from openviking.session.memory.memory_type_registry import create_default_registry + + updater = cls(registry=create_default_registry()) + updater._viking_fs = viking_fs + await updater.generate_overview(memory_type, directory_uri, ctx) + except Exception: + logger.warning( + "Failed to refresh memory overview for %s", + directory_uri, + exc_info=True, + ) + + @classmethod + async def refresh_file_embedding( + cls, + *, + viking_fs: Any, + vikingdb: Any, + uri: str, + memory_type: Optional[str], + ctx: RequestContext, + ) -> bool: + if not vikingdb or not bool(getattr(vikingdb, "has_queue_manager", False)): + return False + try: + from openviking.session.memory.memory_type_registry import create_default_registry + + result = MemoryUpdateResult() + result.add_written(uri) + updater = cls(registry=create_default_registry(), vikingdb=vikingdb) + updater._viking_fs = viking_fs + attempted = await updater._vectorize_memories( + result, + ctx, + uri_memory_type_map={uri: memory_type} if memory_type else {}, + ) + return attempted > 0 + except Exception: + logger.warning("Failed to refresh memory embedding for %s", uri, exc_info=True) + return False + + @staticmethod + def memory_type_from_uri(uri: str) -> Optional[str]: + parts = [part for part in VikingURI(uri).full_path.split("/") if part] + try: + memories_idx = parts.index("memories") + except ValueError: + return None + if len(parts) <= memories_idx + 1: + return None + return parts[memories_idx + 1] + @tracer() async def apply_operations( self, @@ -588,6 +780,8 @@ async def apply_operations( tracer.error(f"Failed to delete memory {file_content.uri}", e) result.add_error(file_content.uri, e) + await self._sync_resource_refs_for_result(result, ctx) + # Vectorize written and edited memories uri_memory_type_map = {} for op in operations.upsert_operations: @@ -631,6 +825,33 @@ async def apply_operations( return result + async def _sync_resource_refs_for_result( + self, + result: MemoryUpdateResult, + ctx: RequestContext, + ) -> None: + """Synchronize resource refs for memory files touched by session extraction.""" + viking_fs = self._get_viking_fs() + deleted_uris = set(result.deleted_uris) + for uri in dict.fromkeys(result.written_uris + result.edited_uris): + if ( + uri in deleted_uris + or uri.endswith("/.overview.md") + or uri.endswith("/.abstract.md") + ): + continue + try: + raw = await viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(raw, uri=uri) + changed = sync_memory_resource_refs( + mf, + source=RESOURCE_REF_SOURCE_SESSION_COMMIT, + ) + if changed: + await viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) + except Exception as exc: + logger.warning("Failed to sync resource refs for %s: %s", uri, exc) + async def _apply_upsert( self, resolved_op: ResolvedOperation, ctx: RequestContext, extract_context: Any = None ): @@ -776,8 +997,16 @@ async def _apply_links_to_existing_files( viking_fs = self._get_viking_fs() if not viking_fs: return + from openviking.core.namespace import context_type_for_uri + upserted_uris = set(result.written_uris + result.edited_uris) - skip = upserted_uris | (deleted_uris or set()) + non_memory_endpoints = { + uri + for link in resolved_links + for uri in (link.from_uri, link.to_uri) + if context_type_for_uri(uri) != "memory" + } + skip = upserted_uris | (deleted_uris or set()) | non_memory_endpoints await write_stored_links(resolved_links, ctx, viking_fs, skip_uris=skip) async def _apply_delete(self, uri: str, ctx: RequestContext) -> None: @@ -800,7 +1029,7 @@ async def _vectorize_memories( ctx: RequestContext, extract_context: Any = None, uri_memory_type_map: Dict[str, str] = None, - ) -> None: + ) -> int: """Vectorize written and edited memory files. Args: @@ -811,11 +1040,12 @@ async def _vectorize_memories( """ if not self._vikingdb: logger.debug("VikingDB not available, skipping vectorization") - return + return 0 uri_memory_type_map = uri_memory_type_map or {} viking_fs = self._get_viking_fs() request_wait_tracker = get_request_wait_tracker() + attempted_count = 0 # Collect all URIs to vectorize (skip .overview.md and .abstract.md - they are handled separately) # Also skip URIs that were deleted in the same batch @@ -829,7 +1059,7 @@ async def _vectorize_memories( if not uris_to_vectorize: logger.debug("No memory files to vectorize") - return + return 0 for uri in uris_to_vectorize: try: @@ -837,7 +1067,9 @@ async def _vectorize_memories( content = await viking_fs.read_file(uri, ctx=ctx) or "" mf = MemoryFileUtils.read(content, uri=uri) - abstract = mf.plain_content() + from openviking.session.memory.utils.link_renderer import LinkRenderer + + abstract = LinkRenderer.strip_all_links(mf.content or "") embedding_text = abstract memory_type = uri_memory_type_map.get(uri) @@ -894,7 +1126,17 @@ async def _vectorize_memories( request_wait_tracker.register_embedding_root( embedding_msg.telemetry_id, embedding_msg.id ) - enqueued = await self._vikingdb.enqueue_embedding_msg(embedding_msg) + attempted_count += 1 + try: + enqueued = await self._vikingdb.enqueue_embedding_msg(embedding_msg) + except Exception as e: + if embedding_msg.telemetry_id: + request_wait_tracker.mark_embedding_failed( + embedding_msg.telemetry_id, + embedding_msg.id, + str(e), + ) + raise if not enqueued and embedding_msg.telemetry_id: request_wait_tracker.mark_embedding_failed( embedding_msg.telemetry_id, @@ -905,6 +1147,7 @@ async def _vectorize_memories( except Exception as e: tracer.error(f"Failed to vectorize memory {uri}: {e}") + return attempted_count async def generate_overview( self, @@ -950,6 +1193,9 @@ async def generate_overview( ): md_files.append(f"{base_uri}/{name}") + except (NotFoundError, FileNotFoundError): + logger.debug("Skip overview generation for deleted directory: %s", directory) + return except Exception as e: tracer.error(f"Failed to list files in {directory}: {e}") return @@ -957,15 +1203,19 @@ async def generate_overview( # If no memory files, delete the .overview.md and the directory if empty if not md_files: overview_path = f"{directory.rstrip('/')}/.overview.md" + can_delete_directory = all( + entry.get("name", "") in {"", ".overview.md"} for entry in entries + ) try: - await viking_fs.delete_file(overview_path, ctx=ctx) + await viking_fs.rm(overview_path, recursive=False, ctx=ctx) except Exception: pass # Try to delete empty directory - try: - await viking_fs.delete_file(directory, ctx=ctx) - except Exception: - pass + if can_delete_directory: + try: + await viking_fs.rm(directory, recursive=True, ctx=ctx) + except Exception: + pass return # Parse each file and collect items @@ -977,11 +1227,18 @@ async def generate_overview( # Extract filename from path filename = file_path.split("/")[-1] + metadata = mf.to_metadata() + self._fill_overview_fallback_fields( + memory_type=memory_type, + directory=directory, + filename=filename, + metadata=metadata, + ) items.append( { "file_name": filename, - "file_content": mf.to_metadata(), + "file_content": metadata, } ) except Exception as e: @@ -1012,3 +1269,20 @@ async def generate_overview( await viking_fs.write_file(overview_path, rendered, ctx=ctx) except Exception as e: tracer.error(f"Failed to write overview {overview_path}: {e}") + + @staticmethod + def _fill_overview_fallback_fields( + *, + memory_type: str, + directory: str, + filename: str, + metadata: Dict[str, Any], + ) -> None: + stem = filename.removesuffix(".md") + parent_name = directory.rstrip("/").split("/")[-1] + if memory_type == "entities": + metadata.setdefault("category", parent_name) + metadata.setdefault("name", stem) + elif memory_type == "preferences": + metadata.setdefault("user", parent_name) + metadata.setdefault("topic", stem) diff --git a/openviking/session/memory/session_extract_context_provider.py b/openviking/session/memory/session_extract_context_provider.py index fb464af829..5dbdcb18d0 100644 --- a/openviking/session/memory/session_extract_context_provider.py +++ b/openviking/session/memory/session_extract_context_provider.py @@ -8,6 +8,7 @@ import json import os +import re from typing import TYPE_CHECKING, Any, Dict, List, Optional from openviking.message.part import TextPart, ToolPart @@ -28,6 +29,7 @@ add_tool_call_pair_to_messages, get_tool, ) +from openviking.session.memory.utils.resource_refs import contains_resource_uri from openviking.session.memory.utils.uri import render_template from openviking.session.memory.vision_message_normalizer import ( replace_image_parts_with_descriptions, @@ -47,6 +49,9 @@ _PREFETCH_SEARCH_TEXT_PART_MAX_CHARS = 1000 _PREFETCH_SEARCH_ASSISTANT_TEXT_PART_MAX_CHARS = 500 _PREFETCH_SEARCH_TOOL_FIELD_MAX_CHARS = 500 +_RESOURCE_REASON_LANGUAGE_RE = re.compile( + r"(?im)^\s*(?:User reason|用户说明|用户原因|用户理由)[::]\s*(.+?)\s*$" +) class SessionExtractContextProvider(ExtractContextProvider): @@ -86,8 +91,6 @@ def read_file_contents(self) -> Dict[str, MemoryFile]: def get_conversation_text(self) -> str: """Get the full conversation text for match_text validation.""" - from openviking.message.part import TextPart - text_parts = [] for message in self.messages or []: for part in getattr(message, "parts", []): @@ -134,25 +137,69 @@ def _get_vision_vlm(self): def _detect_language(self) -> str: """检测输出语言""" - from openviking.session.memory.utils import resolve_output_language + from openviking.session.memory.utils import ( + resolve_output_language, + strip_language_detection_noise, + ) user_text_parts = [] all_text_parts = [] for message in self.messages or []: for part in getattr(message, "parts", []): if isinstance(part, TextPart) and part.text: - all_text_parts.append(part.text) + text = self._language_signal_text( + part.text, + strip_language_detection_noise=strip_language_detection_noise, + ) + all_text_parts.append(text) if getattr(message, "role", "") == "user": - user_text_parts.append(part.text) + user_text_parts.append(text) text_parts = user_text_parts or all_text_parts return resolve_output_language("\n".join(text_parts)) + @staticmethod + def _language_signal_text(text: str, *, strip_language_detection_noise) -> str: + """Keep user-authored language signal and drop machine-oriented URI noise.""" + reason_lines = [ + match.group(1).strip() + for match in _RESOURCE_REASON_LANGUAGE_RE.finditer(text or "") + if match.group(1).strip() + ] + if reason_lines: + return "\n".join(reason_lines) + return strip_language_detection_noise(text) + def get_output_language(self) -> str: return self._output_language + def _conversation_contains_resource_uri(self) -> bool: + for message in self.messages or []: + content = getattr(message, "content", None) + if content and contains_resource_uri(content): + return True + for part in getattr(message, "parts", []) or []: + text = getattr(part, "text", None) + if text and contains_resource_uri(text): + return True + return False + def instruction(self) -> str: output_language = self._output_language + resource_uri_handling = ( + """ + +## Resource URI Handling +- If the conversation contains a resource URI (`viking://resources/...`, `viking://user/{user_id}/resources/...`, or `viking://user/{user_id}/peers/{peer_id}/resources/...`) and the user says a durable fact, judgment, preference, or event about it, extract that memory into the appropriate normal memory type such as entities, events, or preferences. +- Preserve resource references as markdown links in visible memory content when useful. Example: user said "用户保存了越前龙马照片 viking://resources/images/ryoma" -> write "用户保存了[越前龙马照片](viking://resources/images/ryoma)". +- For `## Resource Addition` blocks, use `User reason` as the user's intent and `Resource abstract` only as optional context. Do not copy raw fields such as `Resource URI`, `Added at`, `Resource abstract`, or `User reason` into visible memory content. +- Use descriptive link text such as `[越前龙马照片](viking://resources/...)`; avoid visible wording like `资源URI为` or `Resource URI`. +- If the user already wrote a markdown link to a resource URI, keep the same resource link intent. +- Do NOT claim you inspected, summarized, OCRed, or opened the resource file unless the conversation explicitly provides that fact. +""" + if self._conversation_contains_resource_uri() + else "" + ) goal = f"""You are a memory extraction agent. Your task is to analyze conversations and update memories. ## Workflow @@ -170,6 +217,7 @@ def instruction(self) -> str: ## URI Handling The system automatically generates URIs based on memory_type and fields. Just provide correct memory_type and fields. +{resource_uri_handling} ## Self and Peer Memory When a memory item describes the current user, omit peer_id. diff --git a/openviking/session/memory/utils/__init__.py b/openviking/session/memory/utils/__init__.py index 85fe0f557b..2cbb32792e 100644 --- a/openviking/session/memory/utils/__init__.py +++ b/openviking/session/memory/utils/__init__.py @@ -20,6 +20,7 @@ resolve_output_language, resolve_output_language_from_conversation, resolve_with_override, + strip_language_detection_noise, ) from openviking.session.memory.utils.line_numbers import ( add_line_numbers, @@ -47,6 +48,7 @@ "resolve_output_language", "resolve_output_language_from_conversation", "resolve_with_override", + "strip_language_detection_noise", "add_line_numbers", "every_line_has_line_numbers", "extract_start_line_number", diff --git a/openviking/session/memory/utils/language.py b/openviking/session/memory/utils/language.py index 2fa7d3f04f..3830b695de 100644 --- a/openviking/session/memory/utils/language.py +++ b/openviking/session/memory/utils/language.py @@ -21,6 +21,7 @@ _STRONG_DOMINANT_MIN_CHARS = 10 _STRONG_DOMINANT_RATIO = 0.95 _PRIMARY_LANGUAGES = {"zh-CN", "en"} +_URI_LANGUAGE_NOISE_RE = re.compile(r"\b(?:viking|https?)://[^\s<>\]\)\"']+") _LATIN_STOPWORDS = { "en": set( @@ -52,30 +53,68 @@ } _LATIN_HINT_LANGUAGES = {"it", "fr", "es", "de", "pt"} -_LOCALE_LANGUAGE_PREFIXES = dict( - zh="zh-CN", ja="ja", ko="ko", ru="ru", ar="ar", - it="it", fr="fr", es="es", de="de", pt="pt", en="en", - chinese="zh-CN", japanese="ja", korean="ko", russian="ru", arabic="ar", - italian="it", french="fr", spanish="es", german="de", portuguese="pt", english="en", -) +_LOCALE_LANGUAGE_PREFIXES = { + "zh": "zh-CN", + "ja": "ja", + "ko": "ko", + "ru": "ru", + "ar": "ar", + "it": "it", + "fr": "fr", + "es": "es", + "de": "de", + "pt": "pt", + "en": "en", + "chinese": "zh-CN", + "japanese": "ja", + "korean": "ko", + "russian": "ru", + "arabic": "ar", + "italian": "it", + "french": "fr", + "spanish": "es", + "german": "de", + "portuguese": "pt", + "english": "en", +} # Use Timezone as a weak fallback signal. _TIMEZONE_LANGUAGE_GROUPS = { "zh-CN": ( - "asia/shanghai", "asia/chongqing", "asia/harbin", "asia/urumqi", - "asia/hong_kong", "asia/macau", "asia/taipei", "prc", "roc", "hongkong", - "china standard time", "taipei standard time", + "asia/shanghai", + "asia/chongqing", + "asia/harbin", + "asia/urumqi", + "asia/hong_kong", + "asia/macau", + "asia/taipei", + "prc", + "roc", + "hongkong", + "china standard time", + "taipei standard time", ), "ja": ("asia/tokyo", "japan", "tokyo standard time"), "ko": ("asia/seoul", "rok", "korea standard time"), "ru": ( - "europe/moscow", "europe/kaliningrad", "asia/yekaterinburg", "asia/vladivostok", + "europe/moscow", + "europe/kaliningrad", + "asia/yekaterinburg", + "asia/vladivostok", "russian standard time", ), "ar": ( - "asia/riyadh", "asia/dubai", "asia/qatar", "asia/kuwait", - "asia/baghdad", "africa/cairo", "africa/algiers", "africa/tunis", - "arab standard time", "arabian standard time", "egypt standard time", + "asia/riyadh", + "asia/dubai", + "asia/qatar", + "asia/kuwait", + "asia/baghdad", + "africa/cairo", + "africa/algiers", + "africa/tunis", + "arab standard time", + "arabian standard time", + "egypt standard time", ), "it": ("europe/rome",), "fr": ("europe/paris",), @@ -83,13 +122,34 @@ "de": ("europe/berlin",), "pt": ("europe/lisbon", "america/sao_paulo"), "en": ( - "america/new_york", "america/chicago", "america/denver", "america/los_angeles", - "america/phoenix", "america/anchorage", "pacific/honolulu", "us/eastern", - "us/central", "us/mountain", "us/pacific", "europe/london", "europe/dublin", - "gb", "gb-eire", "america/toronto", "america/vancouver", "canada/eastern", - "canada/pacific", "australia/sydney", "australia/melbourne", - "australia/brisbane", "australia/perth", "pacific/auckland", "nz", - "eastern standard time", "pacific standard time", "gmt standard time", + "america/new_york", + "america/chicago", + "america/denver", + "america/los_angeles", + "america/phoenix", + "america/anchorage", + "pacific/honolulu", + "us/eastern", + "us/central", + "us/mountain", + "us/pacific", + "europe/london", + "europe/dublin", + "gb", + "gb-eire", + "america/toronto", + "america/vancouver", + "canada/eastern", + "canada/pacific", + "australia/sydney", + "australia/melbourne", + "australia/brisbane", + "australia/perth", + "pacific/auckland", + "nz", + "eastern standard time", + "pacific standard time", + "gmt standard time", ), } @@ -109,7 +169,11 @@ def _language_allowed_by_fallback(language: str, fallback_language: str) -> bool def _is_strong_dominant(count: int, total: int) -> bool: - return count >= _STRONG_DOMINANT_MIN_CHARS and total > 0 and count / total >= _STRONG_DOMINANT_RATIO + return ( + count >= _STRONG_DOMINANT_MIN_CHARS + and total > 0 + and count / total >= _STRONG_DOMINANT_RATIO + ) def _language_from_locale_value(value: str) -> str: @@ -231,6 +295,7 @@ def _detect_latin_language(text: str, fallback_language: str) -> str: def _detect_language_from_text(user_text: str, fallback_language: str) -> str: """Internal shared helper to detect dominant language from text.""" fallback = (fallback_language or "en").strip() or "en" + user_text = strip_language_detection_noise(user_text) if not user_text: return fallback @@ -291,6 +356,11 @@ def resolve_with_override(config, detect: Callable[[], str]) -> str: return detect() +def strip_language_detection_noise(text: str) -> str: + """Remove URI-like machine tokens that should not affect output language.""" + return _URI_LANGUAGE_NOISE_RE.sub(" ", text or "") + + def resolve_output_language(text: str, config=None) -> str: """Resolve output language from text, honoring config override before detection.""" fallback = _resolve_system_fallback_language("en") diff --git a/openviking/session/memory/utils/link_renderer.py b/openviking/session/memory/utils/link_renderer.py index d0d64c5670..55d2a0d012 100644 --- a/openviking/session/memory/utils/link_renderer.py +++ b/openviking/session/memory/utils/link_renderer.py @@ -105,6 +105,12 @@ def _replace_link(m: re.Match) -> str: return LinkRenderer._RELATIVE_LINK_RE.sub(_replace_link, content) + @staticmethod + def strip_all_links(content: str) -> str: + """Remove markdown links regardless of target scheme, keeping only link text.""" + + return LinkRenderer._RELATIVE_LINK_RE.sub(lambda m: m.group("text"), content) + @staticmethod def relative_path(source_uri: str, target_uri: str) -> Optional[str]: """Compute a relative path from source_uri to target_uri in the viking:// namespace. @@ -122,7 +128,7 @@ def relative_path(source_uri: str, target_uri: str) -> Optional[str]: return None common = 0 - for s, t in zip(src, tgt): + for s, t in zip(src, tgt, strict=False): if s == t: common += 1 else: diff --git a/openviking/session/memory/utils/resource_refs.py b/openviking/session/memory/utils/resource_refs.py new file mode 100644 index 0000000000..d80e0d8eec --- /dev/null +++ b/openviking/session/memory/utils/resource_refs.py @@ -0,0 +1,449 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Helpers for resource references embedded in memory content.""" + +from __future__ import annotations + +import re +from datetime import datetime, timezone +from typing import Any, Dict, Iterable, List, Optional, Sequence + +from openviking.session.memory.dataclass import MemoryFile + +RESOURCE_REF_SOURCE_CONTENT_WRITE = "content.write" +RESOURCE_REF_SOURCE_SESSION_COMMIT = "session.commit" + +_RESOURCE_URI_PATH_CHARS = r"[^\s<>\]\)\"',。;:!?、,;:!?)】》]+" +_RESOURCE_URI_BOUNDARY = r"(?=$|[\s<>\]\)\"',。;:!?、,;:!?.)】》])" +_RESOURCE_URI_PATTERN = ( + r"viking://(?:" + rf"resources(?:/{_RESOURCE_URI_PATH_CHARS})?" + r"|user/[^/\s<>\]\)\"']+/(?:" + rf"resources(?:/{_RESOURCE_URI_PATH_CHARS})?" + rf"|peers/[^/\s<>\]\)\"']+/resources(?:/{_RESOURCE_URI_PATH_CHARS})?" + r")" + r")" + rf"{_RESOURCE_URI_BOUNDARY}" +) +_MARKDOWN_RESOURCE_LINK_RE = re.compile(rf"\[([^\]\n]+)\]\(({_RESOURCE_URI_PATTERN})\)") +_RESOURCE_URI_RE = re.compile(_RESOURCE_URI_PATTERN) +_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL) +_INLINE_CODE_RE = re.compile(r"`[^`\n]+`") +_TRAILING_URI_PUNCTUATION = ".,;:!?,。;:!?、)】》" +_SENTENCE_BOUNDARIES = "。!?.!?\n" +_MAX_LINKIFIED_SENTENCE_CHARS = 160 +_RESOURCE_CLEANUP_ARTIFACT_LINE_RE = re.compile( + r"(?m)^(?:None ChatLog:|Original reason:\s*|Memory URI:\s*viking://user/[^\n]*)\n?" +) +_RESOURCE_URI_MARKER_RE = re.compile( + r"[,,;;::\s]*(?:资源\s*URI\s*为|资源\s*URI|Resource\s+URI)\s*[::为]?\s*$", + re.IGNORECASE, +) + + +def sync_memory_resource_refs( + mf: MemoryFile, + *, + source: str, + reason: Optional[str] = None, + created_at: Optional[str] = None, +) -> bool: + """Link visible resource URIs and keep MEMORY_FIELDS.resource_refs in sync.""" + before_content = mf.content + before_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + + code_spans = _protected_code_spans(mf.content) + markdown_refs, markdown_spans = _extract_markdown_resource_refs( + mf.content, + code_spans, + ) + mf.content, bare_refs = _linkify_bare_resource_uris( + mf.content, + code_spans + markdown_spans, + ) + _merge_resource_refs( + mf, + markdown_refs + bare_refs, + source=source, + reason=reason, + created_at=created_at, + ) + + after_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + return before_content != mf.content or before_refs != after_refs + + +def coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + return _coerce_resource_refs(value) + + +def contains_resource_uri(content: str) -> bool: + """Return whether text contains any supported resource URI form.""" + return bool(_RESOURCE_URI_RE.search(content or "")) + + +def content_references_resource( + content: str, + resource_uri: str, + *, + recursive: bool = False, +) -> bool: + """Return whether visible memory content references a resource URI.""" + return any( + resource_ref_matches(uri, resource_uri, recursive=recursive) + for uri in extract_resource_uris(content) + ) + + +def extract_resource_uris(content: str) -> List[str]: + """Extract visible resource URIs from markdown links or bare URI text.""" + uris: List[str] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + uri = _trim_resource_uri(match.group(2).strip()) + if uri: + uris.append(uri) + for match in _RESOURCE_URI_RE.finditer(content or ""): + uri = _trim_resource_uri(match.group(0)) + if uri: + uris.append(uri) + return list(dict.fromkeys(uris)) + + +def remove_resource_references_from_memory( + mf: MemoryFile, + resource_uri: str, + *, + recursive: bool = False, +) -> bool: + """Remove visible references and MEMORY_FIELDS.resource_refs for one resource.""" + before_content = mf.content + before_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + + mf.content = remove_resource_references_from_content( + mf.content, + resource_uri, + recursive=recursive, + ) + refs = [ + ref + for ref in before_refs + if not resource_ref_matches(ref.get("resource_uri"), resource_uri, recursive=recursive) + ] + if refs: + mf.extra_fields["resource_refs"] = refs + else: + mf.extra_fields.pop("resource_refs", None) + + return before_content != mf.content or before_refs != refs + + +def remove_resource_references_from_content( + content: str, + resource_uri: str, + *, + recursive: bool = False, +) -> str: + """Remove sentences/list lines that contain matching resource URI references.""" + text = content or "" + spans = _matching_resource_reference_spans(text, resource_uri, recursive=recursive) + if not spans: + return text + + sentence_spans = _merge_spans( + _expand_to_sentence_span(text, start, end) for start, end in spans + ) + for start, end in reversed(sentence_spans): + text = text[:start] + text[end:] + return _normalize_removed_reference_text(text) + + +def resource_ref_matches( + ref_uri: Any, + target_uri: str, + *, + recursive: bool, +) -> bool: + if not isinstance(ref_uri, str) or not ref_uri: + return False + normalized_ref = _trim_resource_uri(ref_uri).rstrip("/") + normalized_target = _trim_resource_uri(target_uri).rstrip("/") + if normalized_ref == normalized_target: + return True + return recursive and normalized_ref.startswith(normalized_target + "/") + + +def _protected_code_spans(content: str) -> List[tuple[int, int]]: + spans = [(match.start(), match.end()) for match in _CODE_BLOCK_RE.finditer(content or "")] + spans.extend((match.start(), match.end()) for match in _INLINE_CODE_RE.finditer(content or "")) + return spans + + +def _extract_markdown_resource_refs( + content: str, + protected_spans: Sequence[tuple[int, int]], +) -> tuple[List[Dict[str, Any]], List[tuple[int, int]]]: + refs: List[Dict[str, Any]] = [] + link_spans: List[tuple[int, int]] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + if _overlaps_spans(match.start(), match.end(), protected_spans): + continue + label = match.group(1).strip() + resource_uri = _trim_resource_uri(match.group(2).strip()) + link_spans.append((match.start(), match.end())) + refs.append( + { + "resource_uri": resource_uri, + "match_text": label or None, + } + ) + return refs, link_spans + + +def _linkify_bare_resource_uris( + content: str, + protected_spans: Sequence[tuple[int, int]], +) -> tuple[str, List[Dict[str, Any]]]: + refs: List[Dict[str, Any]] = [] + updated = content or "" + covered_start = len(updated) + 1 + + matches = list(_RESOURCE_URI_RE.finditer(updated)) + for match in reversed(matches): + resource_uri = _trim_resource_uri(match.group(0)) + if not resource_uri: + continue + start = match.start() + end = start + len(resource_uri) + if _overlaps_spans(start, end, protected_spans): + continue + + refs.append({"resource_uri": resource_uri}) + sentence_span = _previous_sentence_span(updated, start) + if not sentence_span: + continue + sentence_start, sentence_end = sentence_span + if end > covered_start: + continue + anchor_start = sentence_start + anchor_end = sentence_end + anchor = updated[anchor_start:anchor_end] + marker_span = _resource_uri_marker_span(anchor) + if marker_span: + label_span = _resource_clause_span_before_marker( + updated, + sentence_start, + sentence_start + marker_span[0], + ) + if label_span: + anchor_start, anchor_end = label_span + anchor = updated[anchor_start:anchor_end] + if contains_resource_uri(anchor) or "](" in anchor: + continue + refs[-1]["match_text"] = anchor + replacement = f"[{anchor}]({resource_uri})" + updated = updated[:anchor_start] + replacement + updated[end:] + covered_start = anchor_start + + refs.reverse() + return updated, refs + + +def _previous_sentence_span(content: str, uri_start: int) -> Optional[tuple[int, int]]: + sentence_end = uri_start + while sentence_end > 0 and content[sentence_end - 1].isspace(): + sentence_end -= 1 + if sentence_end <= 0: + return None + + boundary_search_end = sentence_end + if content[sentence_end - 1] in _SENTENCE_BOUNDARIES: + boundary_search_end = sentence_end - 1 + sentence_start = 0 + for idx in range(boundary_search_end - 1, -1, -1): + if content[idx] in _SENTENCE_BOUNDARIES: + sentence_start = idx + 1 + break + while sentence_start < sentence_end and content[sentence_start].isspace(): + sentence_start += 1 + + anchor = content[sentence_start:sentence_end] + if not anchor or len(anchor) > _MAX_LINKIFIED_SENTENCE_CHARS: + return None + return sentence_start, sentence_end + + +def _merge_resource_refs( + mf: MemoryFile, + refs: Sequence[Dict[str, Any]], + *, + source: str, + reason: Optional[str], + created_at: Optional[str], +) -> None: + visible_refs: Dict[str, Dict[str, Any]] = {} + for ref in refs: + resource_uri = ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + continue + existing = visible_refs.setdefault(resource_uri, {"resource_uri": resource_uri}) + match_text = ref.get("match_text") + if match_text and not existing.get("match_text"): + existing["match_text"] = match_text + + existing_refs = _coerce_resource_refs(mf.extra_fields.get("resource_refs")) + merged: List[Dict[str, Any]] = [] + seen_resource_uris: set[str] = set() + ref_created_at = created_at or datetime.now(timezone.utc).isoformat() + + for existing_ref in existing_refs: + resource_uri = existing_ref.get("resource_uri") + if not isinstance(resource_uri, str) or not resource_uri: + merged.append(existing_ref) + continue + + visible_ref = visible_refs.get(resource_uri) + if existing_ref.get("source") == source and visible_ref is None: + continue + + if visible_ref and existing_ref.get("source") == source: + if visible_ref.get("match_text"): + existing_ref["match_text"] = visible_ref["match_text"] + existing_ref.setdefault("created_at", ref_created_at) + if reason: + existing_ref.setdefault("reason", reason) + + merged.append(existing_ref) + seen_resource_uris.add(resource_uri) + + for resource_uri, visible_ref in visible_refs.items(): + if resource_uri in seen_resource_uris: + continue + ref = { + "resource_uri": resource_uri, + "source": source, + "created_at": ref_created_at, + } + if reason: + ref["reason"] = reason + if visible_ref.get("match_text"): + ref["match_text"] = visible_ref["match_text"] + merged.append(ref) + + if merged: + mf.extra_fields["resource_refs"] = merged + else: + mf.extra_fields.pop("resource_refs", None) + + +def _coerce_resource_refs(value: Any) -> List[Dict[str, Any]]: + if isinstance(value, list): + return [dict(item) for item in value if isinstance(item, dict)] + if isinstance(value, dict): + return [dict(value)] + return [] + + +def _trim_resource_uri(resource_uri: str) -> str: + return (resource_uri or "").rstrip(_TRAILING_URI_PUNCTUATION) + + +def _matching_resource_reference_spans( + content: str, + resource_uri: str, + *, + recursive: bool, +) -> List[tuple[int, int]]: + spans: List[tuple[int, int]] = [] + markdown_spans: List[tuple[int, int]] = [] + for match in _MARKDOWN_RESOURCE_LINK_RE.finditer(content or ""): + markdown_spans.append((match.start(), match.end())) + if resource_ref_matches(match.group(2), resource_uri, recursive=recursive): + spans.append((match.start(), match.end())) + + for match in _RESOURCE_URI_RE.finditer(content or ""): + resource_end = match.start() + len(_trim_resource_uri(match.group(0))) + if _overlaps_spans(match.start(), resource_end, markdown_spans): + continue + if resource_ref_matches(match.group(0), resource_uri, recursive=recursive): + spans.append((match.start(), resource_end)) + return spans + + +def _resource_uri_marker_span(anchor: str) -> Optional[tuple[int, int]]: + match = _RESOURCE_URI_MARKER_RE.search(anchor) + if not match: + return None + return match.start(), match.end() + + +def _resource_clause_span_before_marker( + content: str, + sentence_start: int, + marker_start: int, +) -> Optional[tuple[int, int]]: + prefix = content[sentence_start:marker_start].rstrip(",,;;:: ") + if not prefix: + return None + + pieces = list(re.finditer(r"[^,,;;。.!??]+$", prefix)) + if not pieces: + return None + label_start = sentence_start + pieces[-1].start() + label_end = sentence_start + pieces[-1].end() + if _valid_resource_clause(content[label_start:label_end]): + return label_start, label_end + return None + + +def _valid_resource_clause(clause: str) -> bool: + clause = clause.strip() + return bool(clause) and len(clause) <= 120 and "\n" not in clause and "](" not in clause + + +def _expand_to_sentence_span(content: str, start: int, end: int) -> tuple[int, int]: + span_start = start + for idx in range(start - 1, -1, -1): + if content[idx] in _SENTENCE_BOUNDARIES: + span_start = idx + 1 + break + else: + span_start = 0 + + span_end = end + for idx in range(end, len(content)): + if content[idx] in _SENTENCE_BOUNDARIES: + span_end = idx + 1 + break + else: + span_end = len(content) + + while span_start < span_end and content[span_start].isspace(): + span_start += 1 + while span_end < len(content) and content[span_end].isspace(): + span_end += 1 + return span_start, span_end + + +def _merge_spans(spans: Iterable[tuple[int, int]]) -> List[tuple[int, int]]: + merged: List[tuple[int, int]] = [] + for start, end in sorted(spans): + if not merged or start > merged[-1][1]: + merged.append((start, end)) + else: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + return merged + + +def _normalize_removed_reference_text(content: str) -> str: + content = _RESOURCE_CLEANUP_ARTIFACT_LINE_RE.sub("", content) + text = re.sub(r"[ \t]+([,。;:!?,.!?;:])", r"\1", content) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + + +def _overlaps_spans( + start: int, + end: int, + protected_spans: Sequence[tuple[int, int]], +) -> bool: + return any(start < span_end and end > span_start for span_start, span_end in protected_spans) diff --git a/openviking/storage/content_write.py b/openviking/storage/content_write.py index 08ef0fedf9..33cb8cf73c 100644 --- a/openviking/storage/content_write.py +++ b/openviking/storage/content_write.py @@ -10,7 +10,12 @@ from openviking.core.namespace import NamespaceShapeError, canonicalize_uri, context_type_for_uri from openviking.resource.watch_storage import is_watch_task_control_uri from openviking.server.identity import RequestContext +from openviking.session.memory.memory_updater import MemoryUpdater from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking.session.memory.utils.resource_refs import ( + RESOURCE_REF_SOURCE_CONTENT_WRITE, + sync_memory_resource_refs, +) from openviking.storage.queuefs import SemanticMsg, get_queue_manager from openviking.storage.queuefs.semantic_msg import build_semantic_coalesce_key from openviking.storage.transaction import get_lock_manager @@ -38,8 +43,9 @@ class ContentWriteCoordinator: """Write a file (create or modify) and trigger downstream maintenance.""" - def __init__(self, viking_fs: VikingFS): + def __init__(self, viking_fs: VikingFS, vikingdb: Any = None): self._viking_fs = viking_fs + self._vikingdb = vikingdb async def write( self, @@ -113,12 +119,16 @@ def _build_write_result( written_bytes: int, wait: bool, queue_status: Optional[Dict[str, Any]], + semantic_status: Optional[str] = None, + vector_status: Optional[str] = None, + overview_status: Optional[str] = None, ) -> Dict[str, Any]: - semantic_status, vector_status = self._refresh_statuses( - wait=wait, - queue_status=queue_status, - ) - return { + if semantic_status is None or vector_status is None: + semantic_status, vector_status = self._refresh_statuses( + wait=wait, + queue_status=queue_status, + ) + result = { "uri": uri, "root_uri": root_uri, "context_type": context_type, @@ -129,6 +139,9 @@ def _build_write_result( "vector_status": vector_status, "queue_status": queue_status, } + if overview_status is not None: + result["overview_status"] = overview_status + return result def _refresh_statuses( self, @@ -345,12 +358,19 @@ async def _write_in_place( mode: str, ctx: RequestContext, ) -> None: - if mode == "replace" and context_type_for_uri(uri) == "memory": - existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) - mf = MemoryFileUtils.read(existing_raw, uri=uri) - mf.content = content - content = MemoryFileUtils.write(mf) - await self._viking_fs.write_file(uri, content, ctx=ctx) + if context_type_for_uri(uri) == "memory": + if mode == "replace": + existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(existing_raw, uri=uri) + mf.content = content + elif mode == "append": + existing_raw = await self._viking_fs.read_file(uri, ctx=ctx) + mf = MemoryFileUtils.read(existing_raw, uri=uri) + mf.content = mf.content + content + else: + mf = MemoryFileUtils.read(content, uri=uri) + sync_memory_resource_refs(mf, source=RESOURCE_REF_SOURCE_CONTENT_WRITE) + await self._viking_fs.write_file(uri, MemoryFileUtils.write(mf), ctx=ctx) return if mode == "append": @@ -407,43 +427,6 @@ async def _enqueue_semantic_refresh( get_request_wait_tracker().mark_semantic_failed(msg.telemetry_id, msg.id, str(e)) raise - async def _enqueue_memory_refresh( - self, - *, - root_uri: str, - modified_uri: str, - ctx: RequestContext, - ) -> None: - queue_manager = get_queue_manager() - semantic_queue = queue_manager.get_queue(queue_manager.SEMANTIC, allow_create=True) - telemetry = get_current_telemetry() - msg = SemanticMsg( - uri=root_uri, - context_type="memory", - account_id=ctx.account_id, - user_id=ctx.user.user_id, - peer_id=ctx.user.user_id, - role=ctx.role.value, - skip_vectorization=False, - telemetry_id=telemetry.telemetry_id, - coalesce_key=build_semantic_coalesce_key( - context_type="memory", - uri=root_uri, - account_id=ctx.account_id, - user_id=ctx.user.user_id, - peer_id=ctx.user.user_id, - ), - changes={"modified": [modified_uri]}, - ) - if msg.telemetry_id: - get_request_wait_tracker().register_semantic_root(msg.telemetry_id, msg.id) - try: - await semantic_queue.enqueue(msg) - except Exception as e: - if msg.telemetry_id: - get_request_wait_tracker().mark_semantic_failed(msg.telemetry_id, msg.id, str(e)) - raise - async def _wait_for_queues(self, *, timeout: Optional[float]) -> Dict[str, Any]: queue_manager = get_queue_manager() try: @@ -489,21 +472,37 @@ async def _write_memory_with_refresh( raise InvalidArgumentError(f"resource is busy and cannot be written now: {uri}") released = False + request_registered = False try: - if wait and telemetry_id: - get_request_wait_tracker().register_request(telemetry_id) await self._write_in_place(uri, content, mode=mode, ctx=ctx) - await self._enqueue_memory_refresh( - root_uri=root_uri, - modified_uri=uri, - ctx=ctx, - ) await lock_manager.release(handle) released = True - queue_status = ( - await self._wait_for_request(telemetry_id=telemetry_id, timeout=timeout) - if wait - else None + if wait and telemetry_id and self._vikingdb_has_queue(): + get_request_wait_tracker().register_request(telemetry_id) + request_registered = True + await MemoryUpdater.refresh_schema_overview( + viking_fs=self._viking_fs, + directory_uri=root_uri, + ctx=ctx, + ) + embedding_requested = await MemoryUpdater.refresh_file_embedding( + viking_fs=self._viking_fs, + vikingdb=self._vikingdb, + uri=uri, + memory_type=MemoryUpdater.memory_type_from_uri(root_uri), + ctx=ctx, + ) + queue_status = None + if embedding_requested and wait: + queue_status = ( + await self._wait_for_request(telemetry_id=telemetry_id, timeout=timeout) + if telemetry_id + else await self._wait_for_queues(timeout=timeout) + ) + vector_status = self._memory_vector_status( + embedding_requested=embedding_requested, + wait=wait, + queue_status=queue_status, ) return self._build_write_result( uri=uri, @@ -513,15 +512,37 @@ async def _write_memory_with_refresh( written_bytes=written_bytes, wait=wait, queue_status=queue_status, + semantic_status="skipped", + vector_status=vector_status, + overview_status="complete", ) except Exception: if not released: await lock_manager.release(handle) raise finally: - if wait and telemetry_id: + if request_registered: get_request_wait_tracker().cleanup(telemetry_id) + def _vikingdb_has_queue(self) -> bool: + if not self._vikingdb: + return False + return bool(getattr(self._vikingdb, "has_queue_manager", False)) + + def _memory_vector_status( + self, + *, + embedding_requested: bool, + wait: bool, + queue_status: Optional[Dict[str, Any]], + ) -> str: + if not embedding_requested: + return "skipped" + if not wait: + return "queued" + _, vector_status = self._refresh_statuses(wait=True, queue_status=queue_status) + return vector_status + async def _resolve_root_uri( self, uri: str, @@ -549,7 +570,10 @@ async def _resolve_root_uri( raise InvalidArgumentError( f"memory write target must be inside a memory type directory: {uri}" ) - root_uri = VikingURI.build(*parts[: memories_idx + 2]) + parent = VikingURI(uri).parent + if parent is None: + raise InvalidArgumentError(f"could not resolve write root for {uri}") + root_uri = parent.uri stat = await self._safe_stat(root_uri, ctx=ctx, allow_not_found=_allow_not_found) if stat.get("not_found") or not stat.get("isDir"): diff --git a/openviking/sync_client.py b/openviking/sync_client.py index 80017c4d78..58e6cfeec0 100644 --- a/openviking/sync_client.py +++ b/openviking/sync_client.py @@ -401,9 +401,15 @@ def relations(self, uri: str) -> List[Dict[str, Any]]: """Get relations""" return run_async(self._async_client.relations(uri)) - def rm(self, uri: str, recursive: bool = False) -> None: + def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: float = None, + ) -> None: """Delete resource""" - return run_async(self._async_client.rm(uri, recursive)) + return run_async(self._async_client.rm(uri, recursive, wait=wait, timeout=timeout)) def wait_processed(self, timeout: float = None) -> Dict[str, Any]: """Wait for all async operations to complete""" diff --git a/openviking_cli/client/base.py b/openviking_cli/client/base.py index 47c8a22564..fef9703721 100644 --- a/openviking_cli/client/base.py +++ b/openviking_cli/client/base.py @@ -114,7 +114,13 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: ... @abstractmethod - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" ... diff --git a/openviking_cli/client/http.py b/openviking_cli/client/http.py index 39c56cfa28..13bee02669 100644 --- a/openviking_cli/client/http.py +++ b/openviking_cli/client/http.py @@ -639,13 +639,22 @@ async def mkdir(self, uri: str, description: Optional[str] = None) -> None: ) self._handle_response(response) - async def rm(self, uri: str, recursive: bool = False) -> None: + async def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" uri = VikingURI.normalize(uri) + params = {"uri": uri, "recursive": recursive, "wait": wait} + if timeout is not None: + params["timeout"] = timeout response = await self._http.request( "DELETE", "/api/v1/fs", - params={"uri": uri, "recursive": recursive}, + params=params, ) self._handle_response(response) diff --git a/openviking_cli/client/sync_http.py b/openviking_cli/client/sync_http.py index 474287bcbb..f7b1b0ff0e 100644 --- a/openviking_cli/client/sync_http.py +++ b/openviking_cli/client/sync_http.py @@ -405,9 +405,15 @@ def mkdir(self, uri: str, description: Optional[str] = None) -> None: """Create directory.""" run_async(self._async_client.mkdir(uri, description=description)) - def rm(self, uri: str, recursive: bool = False) -> None: + def rm( + self, + uri: str, + recursive: bool = False, + wait: bool = False, + timeout: Optional[float] = None, + ) -> None: """Remove resource.""" - run_async(self._async_client.rm(uri, recursive)) + run_async(self._async_client.rm(uri, recursive, wait=wait, timeout=timeout)) def mv(self, from_uri: str, to_uri: str) -> None: """Move resource.""" diff --git a/tests/server/conftest.py b/tests/server/conftest.py index 7208238e4c..18ad2c5de3 100644 --- a/tests/server/conftest.py +++ b/tests/server/conftest.py @@ -53,22 +53,22 @@ def _install_fake_embedder(monkeypatch): """Use an in-process fake embedder so server tests never hit external APIs.""" - dimension = 1024 class FakeEmbedder(DenseEmbedderBase): - def __init__(self): + def __init__(self, dimension: int = 2048): super().__init__(model_name="test-fake-embedder") + self._dimension = dimension def embed(self, text: str, is_query: bool = False) -> EmbedResult: - return EmbedResult(dense_vector=[0.1] * dimension) + return EmbedResult(dense_vector=[0.1] * self._dimension) def embed_batch(self, texts: list[str], is_query: bool = False) -> list[EmbedResult]: return [self.embed(text, is_query=is_query) for text in texts] def get_dimension(self) -> int: - return dimension + return self._dimension - monkeypatch.setattr(EmbeddingConfig, "get_embedder", lambda self: FakeEmbedder()) + monkeypatch.setattr(EmbeddingConfig, "get_embedder", lambda self: FakeEmbedder(self.dimension)) return FakeEmbedder diff --git a/tests/server/test_content_write_service.py b/tests/server/test_content_write_service.py index 6a8927c902..ee13babebb 100644 --- a/tests/server/test_content_write_service.py +++ b/tests/server/test_content_write_service.py @@ -35,9 +35,14 @@ async def test_write_updates_memory_file_and_parent_overview(service): ) assert result["context_type"] == "memory" + assert result["semantic_status"] == "skipped" + assert result["vector_status"] == "complete" + assert result["overview_status"] == "complete" + assert result["queue_status"]["Embedding"]["processed"] >= 1 assert await service.viking_fs.read_file(memory_uri, ctx=ctx) == "Updated preference" assert await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) - assert await service.viking_fs.read_file(f"{memory_dir}/.abstract.md", ctx=ctx) + with pytest.raises(NotFoundError): + await service.viking_fs.read_file(f"{memory_dir}/.abstract.md", ctx=ctx) @pytest.mark.asyncio @@ -119,6 +124,169 @@ async def test_memory_append_preserves_metadata(service): assert stored_result.extra_fields == expected_mf.extra_fields +@pytest.mark.asyncio +async def test_memory_write_adds_resource_refs_for_markdown_resource_link(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + content = f"用户上传了一张[越前龙马]({resource_uri})的照片" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + refs = mf.extra_fields["resource_refs"] + assert mf.content == content + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "越前龙马" + assert mf.links == [] + + +@pytest.mark.parametrize( + "resource_uri", + [ + "viking://user/test_user/resources/images/2026/06/10/yueqian_jpeg", + "viking://user/test_user/peers/fuji/resources/images/2026/06/10/yueqian_jpeg", + ], +) +@pytest.mark.asyncio +async def test_memory_write_adds_resource_refs_for_user_scoped_resource_links( + service, + resource_uri, +): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + content = f"用户上传了一张[越前龙马]({resource_uri})的照片" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + refs = mf.extra_fields["resource_refs"] + assert mf.content == content + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "越前龙马" + + +@pytest.mark.asyncio +async def test_memory_write_linkifies_bare_resource_uri_previous_sentence(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write( + memory_uri, + content=f"用户上传了一张越前龙马的照片 {resource_uri}", + ctx=ctx, + mode="replace", + ) + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == f"[用户上传了一张越前龙马的照片]({resource_uri})" + refs = mf.extra_fields["resource_refs"] + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "用户上传了一张越前龙马的照片" + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_memory_write_linkifies_resource_uri_marker_with_readable_anchor(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write( + memory_uri, + content=f"2026-06-12,用户保存了粉丝创作的越前龙马动漫插画资源,资源URI为{resource_uri}。", + ctx=ctx, + mode="replace", + ) + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + refs = mf.extra_fields["resource_refs"] + assert refs[0]["resource_uri"] == resource_uri + assert refs[0]["source"] == "content.write" + assert refs[0]["match_text"] == "用户保存了粉丝创作的越前龙马动漫插画资源" + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_memory_write_ignores_resource_uri_in_inline_code(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_uri = f"viking://user/{ctx.user.user_space_name()}/memories/entities/ryoma.md" + resource_uri = "viking://resources/images/2026/06/10/yueqian_jpeg_1" + content = f"调试示例:`{resource_uri}`" + await service.viking_fs.write_file(memory_uri, "Original", ctx=ctx) + + await service.fs.write(memory_uri, content=content, ctx=ctx, mode="replace") + + stored = await service.viking_fs.read_file(memory_uri, ctx=ctx) + mf = MemoryFileUtils.read(stored, uri=memory_uri) + assert mf.content == content + assert "resource_refs" not in mf.extra_fields + assert mf.links == [] + + +@pytest.mark.asyncio +async def test_memory_create_refreshes_nested_schema_overview(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_dir = f"viking://user/{ctx.user.user_space_name()}/memories/entities/动漫角色" + memory_uri = f"{memory_dir}/不二周助-link-test.md" + + result = await service.fs.write( + memory_uri, + content="用户保存了一张[不二周助](viking://resources/images/2026/06/10/不二周助_jpeg)的照片", + ctx=ctx, + mode="create", + wait=False, + ) + + overview = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert result["root_uri"] == memory_dir + assert "[不二周助-link-test](./不二周助-link-test.md)" in overview + + +@pytest.mark.asyncio +async def test_memory_rm_refreshes_nested_schema_overview(service): + ctx = RequestContext(user=service.user, role=Role.USER) + memory_dir = f"viking://user/{ctx.user.user_space_name()}/memories/entities/动漫角色" + deleted_uri = f"{memory_dir}/不二周助-delete-test.md" + kept_uri = f"{memory_dir}/越前龙马-keep-test.md" + + await service.fs.write( + deleted_uri, + content="用户保存了一张不二周助的照片", + ctx=ctx, + mode="create", + ) + await service.fs.write( + kept_uri, + content="用户保存了一张越前龙马的照片", + ctx=ctx, + mode="create", + ) + + before = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert "[不二周助-delete-test](./不二周助-delete-test.md)" in before + assert "[越前龙马-keep-test](./越前龙马-keep-test.md)" in before + + await service.fs.rm(deleted_uri, ctx=ctx) + + after = await service.viking_fs.read_file(f"{memory_dir}/.overview.md", ctx=ctx) + assert "不二周助-delete-test" not in after + assert "[越前龙马-keep-test](./越前龙马-keep-test.md)" in after + + class _FakeHandle: def __init__(self, handle_id: str): self.id = handle_id @@ -377,7 +545,7 @@ async def _fail_enqueue(**kwargs): @pytest.mark.asyncio -async def test_memory_write_timeout_after_enqueue_releases_write_lock(monkeypatch): +async def test_memory_write_wait_skips_semantic_queue_and_releases_write_lock(monkeypatch): file_uri = "viking://user/default/memories/preferences/theme.md" root_uri = "viking://user/default/memories/preferences" ctx = RequestContext(user=UserIdentifier.the_default_user(), role=Role.USER) @@ -394,27 +562,33 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fail_wait_for_request(*, telemetry_id, timeout): + del telemetry_id, timeout + raise AssertionError("memory write should not wait for semantic refresh") + + async def _fake_refresh_schema_overview(**kwargs): del kwargs return None - async def _fake_wait_for_request(*, telemetry_id, timeout): - del telemetry_id - raise DeadlineExceededError("queue processing", timeout) - monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) - monkeypatch.setattr(coordinator, "_wait_for_request", _fake_wait_for_request) + monkeypatch.setattr(coordinator, "_wait_for_request", _fail_wait_for_request) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) - with pytest.raises(DeadlineExceededError): - await coordinator.write( - uri=file_uri, - content="updated", - ctx=ctx, - wait=True, - ) + result = await coordinator.write( + uri=file_uri, + content="updated", + ctx=ctx, + wait=True, + ) assert lock_manager.release_calls == ["lock-1"] + assert result["semantic_status"] == "skipped" + assert result["vector_status"] == "skipped" + assert result["overview_status"] == "complete" + assert result["queue_status"] is None # Create-mode test helpers @@ -485,16 +659,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -529,7 +698,7 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fake_refresh_schema_overview(**kwargs): refresh_calls.append(kwargs) return None @@ -538,8 +707,11 @@ async def _fake_wait_for_queues(*, timeout): return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) result = await coordinator.write( uri=input_uri, content="new content", mode="create", ctx=ctx, wait=True @@ -549,8 +721,7 @@ async def _fake_wait_for_queues(*, timeout): assert result["root_uri"] == root_uri assert result["context_type"] == "memory" assert write_calls == [(canonical_uri, "new content")] - assert refresh_calls[0]["root_uri"] == root_uri - assert refresh_calls[0]["modified_uri"] == canonical_uri + assert refresh_calls[0]["directory_uri"] == root_uri @pytest.mark.asyncio @@ -565,16 +736,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) with pytest.raises(AlreadyExistsError): @@ -593,16 +759,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, mode, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) with pytest.raises(InvalidArgumentError): @@ -627,16 +788,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): write_calls.append((uri, content)) return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -671,16 +827,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, mode, ctx return content - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -706,7 +857,7 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): refresh_calls = [] - async def _fake_enqueue_memory_refresh(**kwargs): + async def _fake_refresh_schema_overview(**kwargs): refresh_calls.append(kwargs) return None @@ -715,15 +866,17 @@ async def _fake_wait_for_queues(*, timeout): return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) + monkeypatch.setattr( + "openviking.storage.content_write.MemoryUpdater.refresh_schema_overview", + _fake_refresh_schema_overview, + ) result = await coordinator.write( uri=file_uri, content="content", mode="create", ctx=ctx, wait=True ) assert result["context_type"] == "memory" - assert refresh_calls[0]["root_uri"] == root_uri - assert refresh_calls[0]["modified_uri"] == file_uri + assert refresh_calls[0]["directory_uri"] == root_uri @pytest.mark.asyncio @@ -777,16 +930,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( @@ -813,16 +961,11 @@ async def _fake_write_in_place(uri, content, *, mode, ctx): del uri, content, ctx return None - async def _fake_enqueue_memory_refresh(**kwargs): - del kwargs - return None - async def _fake_wait_for_queues(*, timeout): del timeout return None monkeypatch.setattr(coordinator, "_write_in_place", _fake_write_in_place) - monkeypatch.setattr(coordinator, "_enqueue_memory_refresh", _fake_enqueue_memory_refresh) monkeypatch.setattr(coordinator, "_wait_for_queues", _fake_wait_for_queues) result = await coordinator.write( diff --git a/tests/server/test_filesystem_router.py b/tests/server/test_filesystem_router.py new file mode 100644 index 0000000000..5d6c5a1ddc --- /dev/null +++ b/tests/server/test_filesystem_router.py @@ -0,0 +1,35 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Filesystem router tests.""" + +from types import SimpleNamespace + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.server.routers import filesystem +from openviking_cli.session.user_id import UserIdentifier + + +@pytest.mark.asyncio +async def test_rm_preserves_memory_cleanup(monkeypatch): + cleanup = {"status": "success", "memory_uris": ["viking://user/alice/memories/entities/a.md"]} + + async def fake_rm(uri, ctx=None, recursive=False, wait=False, timeout=None): + return {"estimated_deleted_count": 1, "memory_cleanup": cleanup} + + monkeypatch.setattr( + filesystem, + "get_service", + lambda: SimpleNamespace(fs=SimpleNamespace(rm=fake_rm)), + ) + + response = await filesystem.rm( + uri="viking://resources/id_card.pdf", + recursive=True, + _ctx=RequestContext(user=UserIdentifier("acct", "alice"), role=Role.USER), + ) + + assert response.result["uri"] == "viking://resources/id_card.pdf" + assert response.result["estimated_deleted_count"] == 1 + assert response.result["memory_cleanup"] == cleanup diff --git a/tests/server/test_request_wait_tracking.py b/tests/server/test_request_wait_tracking.py index 8eacc3ff7a..3309da21d4 100644 --- a/tests/server/test_request_wait_tracking.py +++ b/tests/server/test_request_wait_tracking.py @@ -25,7 +25,8 @@ def __init__(self, queue_status): def register_request(self, telemetry_id: str) -> None: self.registered_requests.append(telemetry_id) - async def wait_for_request(self, telemetry_id: str, timeout): + async def wait_for_request(self, telemetry_id: str, timeout, poll_interval=None): + del poll_interval self.wait_calls.append((telemetry_id, timeout)) def build_queue_status(self, telemetry_id: str): @@ -264,7 +265,7 @@ async def test_content_write_wait_uses_request_tracker(monkeypatch): ) lock_manager = SimpleNamespace( create_handle=lambda: SimpleNamespace(id="lock-1"), - acquire_tree=lambda handle, path: _return_true(handle, path), + acquire_exact_path=lambda handle, path: _return_true(handle, path), release=lambda handle: _return_none(handle), ) @@ -324,7 +325,7 @@ async def test_content_write_wait_uses_request_tracker_when_telemetry_disabled(m ) lock_manager = SimpleNamespace( create_handle=lambda: SimpleNamespace(id="lock-1"), - acquire_tree=lambda handle, path: _return_true(handle, path), + acquire_exact_path=lambda handle, path: _return_true(handle, path), release=lambda handle: _return_none(handle), ) diff --git a/tests/service/test_fs_service.py b/tests/service/test_fs_service.py new file mode 100644 index 0000000000..acdb136b9d --- /dev/null +++ b/tests/service/test_fs_service.py @@ -0,0 +1,270 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for file-system service coordination behavior.""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.service.fs_service import FSService +from openviking_cli.session.user_id import UserIdentifier + + +class _FakeVikingFS: + def __init__(self, *, rm_error=None): + self.rm_calls = [] + self.rm_error = rm_error + + async def rm(self, uri, recursive=False, ctx=None): + self.rm_calls.append({"uri": uri, "recursive": recursive, "ctx": ctx}) + if self.rm_error: + raise self.rm_error + return {"estimated_deleted_count": 3} + + +class _FakeResourceMemoryLinkService: + def __init__(self, result): + self.result = result + self.calls = [] + + async def before_resource_delete(self, *, ctx, resource_uri, recursive=False): + self.calls.append({"ctx": ctx, "resource_uri": resource_uri, "recursive": recursive}) + return self.result + + +class _FakeWaitTracker: + def __init__(self): + self.registered_requests = [] + self.registered_roots = [] + self.wait_calls = [] + self.cleaned = [] + + def register_request(self, telemetry_id): + self.registered_requests.append(telemetry_id) + + def register_semantic_root(self, telemetry_id, semantic_msg_id): + self.registered_roots.append( + { + "telemetry_id": telemetry_id, + "semantic_msg_id": semantic_msg_id, + "request_was_registered": telemetry_id in self.registered_requests, + } + ) + + async def wait_for_request(self, telemetry_id, timeout=None): + self.wait_calls.append((telemetry_id, timeout)) + + def build_queue_status(self, telemetry_id): + return { + "Semantic": {"processed": 1, "error_count": 0, "errors": []}, + "Embedding": {"processed": 0, "error_count": 0, "errors": []}, + } + + def mark_semantic_failed(self, telemetry_id, semantic_msg_id, message): + pass + + def cleanup(self, telemetry_id): + self.cleaned.append(telemetry_id) + + +class _FakeQueueManager: + SEMANTIC = "semantic" + + def __init__(self): + self.messages = [] + + def get_queue(self, name, allow_create=False): + assert name == self.SEMANTIC + assert allow_create is True + return self + + async def enqueue(self, msg): + self.messages.append(msg) + + +@pytest.fixture +def request_context(): + return RequestContext( + user=UserIdentifier("default", "ryoma"), + role=Role.USER, + ) + + +@pytest.mark.asyncio +async def test_resource_rm_enqueues_parent_delete_refresh_and_waits(request_context): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock(return_value={"Semantic": {"pending_count": 0}}) + + uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + result = await service.rm( + uri, + ctx=request_context, + recursive=True, + wait=True, + timeout=12.0, + ) + + assert viking_fs.rm_calls == [{"uri": uri, "recursive": True, "ctx": request_context}] + service._enqueue_delete_refresh.assert_awaited_once_with( + root_uri="viking://resources/images/2026/06/10", + deleted_uri=uri, + context_type="resource", + ctx=request_context, + ) + service._wait_for_refresh.assert_awaited_once_with(timeout=12.0) + assert result["semantic_root_uri"] == "viking://resources/images/2026/06/10" + assert result["semantic_status"] == "complete" + assert result["queue_status"] == {"Semantic": {"pending_count": 0}} + + +@pytest.mark.asyncio +async def test_resource_rm_reports_failed_semantic_status_when_wait_queue_has_errors( + request_context, +): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock( + return_value={ + "Semantic": { + "processed": 1, + "error_count": 1, + "errors": [{"message": "refresh failed"}], + } + } + ) + + result = await service.rm( + "viking://resources/images/2026/06/10/不二周助_jpeg", + ctx=request_context, + recursive=True, + wait=True, + ) + + assert result["semantic_status"] == "failed" + + +@pytest.mark.asyncio +async def test_resource_rm_without_wait_only_queues_refresh(request_context): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + service._enqueue_delete_refresh = AsyncMock() + service._wait_for_refresh = AsyncMock() + + uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + result = await service.rm(uri, ctx=request_context, recursive=True) + + service._enqueue_delete_refresh.assert_awaited_once() + service._wait_for_refresh.assert_not_awaited() + assert result["semantic_status"] == "queued" + + +@pytest.mark.asyncio +async def test_resource_rm_wait_registers_request_before_semantic_root( + request_context, + monkeypatch, +): + viking_fs = _FakeVikingFS() + service = FSService(viking_fs=viking_fs) + tracker = _FakeWaitTracker() + queue_manager = _FakeQueueManager() + + monkeypatch.setattr( + "openviking.service.fs_service.get_current_telemetry", + lambda: SimpleNamespace(telemetry_id="tm-fs-rm"), + ) + monkeypatch.setattr( + "openviking.service.fs_service.get_request_wait_tracker", + lambda: tracker, + ) + monkeypatch.setattr( + "openviking.service.fs_service.get_queue_manager", + lambda: queue_manager, + ) + + result = await service.rm( + "viking://resources/images/2026/06/10/不二周助_jpeg", + ctx=request_context, + recursive=True, + wait=True, + timeout=3, + ) + + assert tracker.registered_requests == ["tm-fs-rm"] + assert tracker.registered_roots + assert tracker.registered_roots[0]["request_was_registered"] is True + assert tracker.wait_calls == [("tm-fs-rm", 3)] + assert tracker.cleaned == ["tm-fs-rm"] + assert result["semantic_status"] == "complete" + + +@pytest.mark.asyncio +async def test_resource_rm_does_not_cleanup_memory_if_resource_delete_fails(request_context): + delete_error = RuntimeError("delete failed") + viking_fs = _FakeVikingFS(rm_error=delete_error) + cleanup = { + "status": "success", + "memory_uris": ["viking://user/ryoma/memories/entities/动漫角色/越前龙马.md"], + } + link_service = _FakeResourceMemoryLinkService(cleanup) + service = FSService( + viking_fs=viking_fs, + resource_memory_link_service=link_service, + ) + + with pytest.raises(RuntimeError, match="delete failed"): + await service.rm( + "viking://resources/images/2026/06/10/yueqian_jpeg", + ctx=request_context, + recursive=True, + ) + + assert link_service.calls == [] + + +@pytest.mark.asyncio +async def test_resource_rm_refreshes_memory_overview_for_cleaned_memories( + request_context, + monkeypatch, +): + cleanup = { + "status": "success", + "memory_uris": ["viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test.md"], + "deleted_memory_uris": [ + "viking://user/ryoma/memories/entities/动漫角色/不二周助-link-test2.md" + ], + } + viking_fs = _FakeVikingFS() + link_service = _FakeResourceMemoryLinkService(cleanup) + service = FSService( + viking_fs=viking_fs, + resource_memory_link_service=link_service, + ) + service._enqueue_delete_refresh = AsyncMock() + + refreshed = [] + + async def fake_refresh_schema_overview(*, viking_fs, directory_uri, ctx): + refreshed.append({"viking_fs": viking_fs, "directory_uri": directory_uri, "ctx": ctx}) + + monkeypatch.setattr( + "openviking.service.fs_service.MemoryUpdater.refresh_schema_overview", + fake_refresh_schema_overview, + ) + + uri = "viking://resources/images/2026/06/11/不二周助_jpeg" + result = await service.rm(uri, ctx=request_context, recursive=True) + + assert link_service.calls == [{"ctx": request_context, "resource_uri": uri, "recursive": True}] + assert refreshed == [ + { + "viking_fs": viking_fs, + "directory_uri": "viking://user/ryoma/memories/entities/动漫角色", + "ctx": request_context, + } + ] + assert result["memory_cleanup"] == cleanup diff --git a/tests/service/test_resource_memory_link_service.py b/tests/service/test_resource_memory_link_service.py new file mode 100644 index 0000000000..45b12e32da --- /dev/null +++ b/tests/service/test_resource_memory_link_service.py @@ -0,0 +1,609 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 +"""Tests for resource-memory linking service.""" + +from types import SimpleNamespace +from unittest.mock import AsyncMock + +import pytest + +from openviking.server.identity import RequestContext, Role +from openviking.service.resource_memory_link_service import ( + _RESOURCE_REASON_SESSION_ID, + ResourceMemoryLinkService, +) +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.memory_updater import MemoryUpdateResult +from openviking.session.memory.utils.memory_file_utils import MemoryFileUtils +from openviking_cli.session.user_id import UserIdentifier + + +class _FakeVikingFS: + def __init__(self, store): + self.store = store + self.rm_calls = [] + + async def read_file(self, uri, ctx=None): + return self.store[uri] + + async def write_file(self, uri, content, ctx=None): + self.store[uri] = content + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + self.store.pop(uri, None) + + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): + prefix = uri.rstrip("/") + "/" + return [ + { + "uri": item_uri, + "rel_path": item_uri.removeprefix(prefix), + "isDir": False, + } + for item_uri in self.store + if item_uri.startswith(prefix) + ] + + +class _ReadFailVikingFS: + async def read_file(self, uri, ctx=None): + raise RuntimeError("storage unavailable") + + async def tree(self, uri, ctx=None, node_limit=None, level_limit=None): + memory_uri = "viking://user/alice/memories/entities/wang.md" + return [{"uri": memory_uri, "rel_path": "entities/wang.md", "isDir": False}] + + +class _FakeSession: + def __init__(self): + self.messages = [] + self.meta = SimpleNamespace(memory_policy=None) + + def add_messages(self, specs): + self.messages.extend(specs) + + +class _FakeSessionService: + def __init__(self): + self.session = _FakeSession() + self.created = [] + self.got = [] + self.committed = [] + self.deleted = [] + + async def create(self, ctx, session_id=None, memory_policy=None): + self.created.append( + { + "ctx": ctx, + "session_id": session_id, + "memory_policy": memory_policy, + } + ) + return self.session + + async def get(self, session_id, ctx, auto_create=False): + self.got.append( + { + "ctx": ctx, + "session_id": session_id, + "auto_create": auto_create, + } + ) + return self.session + + async def commit_async(self, session_id, ctx, keep_recent_count=0): + archive_index = len(self.committed) + 1 + self.committed.append( + { + "ctx": ctx, + "session_id": session_id, + "keep_recent_count": keep_recent_count, + } + ) + return { + "task_id": None, + "archive_uri": ( + f"viking://user/alice/sessions/{session_id}/history/archive_{archive_index:03d}" + ), + } + + async def delete(self, session_id, ctx): + self.deleted.append({"ctx": ctx, "session_id": session_id}) + + +@pytest.fixture +def request_context(): + return RequestContext( + user=UserIdentifier("acct", "alice"), + role=Role.USER, + ) + + +@pytest.mark.asyncio +async def test_on_resource_added_bridges_reason_through_fixed_session(request_context): + resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS( + {"viking://resources/images/2026/06/11/.abstract.md": "动漫角色照片合集"} + ), + session_service=session_service, + ) + + result = await service.on_resource_added( + ctx=request_context, + resource_uri=resource_uri, + reason="这是越前龙马的照片", + source_name="yueqian.jpeg", + ) + + session_id = result["session_id"] + assert result["status"] == "success" + assert session_id == _RESOURCE_REASON_SESSION_ID + assert session_service.got == [ + { + "ctx": request_context, + "session_id": session_id, + "auto_create": True, + } + ] + assert session_service.created == [] + assert session_service.session.meta.memory_policy == { + "self": {"enabled": True}, + "peer": {"enabled": False}, + "memory_types": ["entities", "events", "preferences"], + } + assert session_service.committed == [ + { + "ctx": request_context, + "session_id": session_id, + "keep_recent_count": 0, + } + ] + assert session_service.deleted == [] + message_text = session_service.session.messages[0]["parts"][0].text + assert resource_uri in message_text + assert "这是越前龙马的照片" in message_text + assert "yueqian.jpeg" in message_text + assert "动漫角色照片合集" in message_text + + +@pytest.mark.asyncio +async def test_on_resource_added_reuses_same_reason_session(request_context): + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + first = await service.on_resource_added( + ctx=request_context, + resource_uri="viking://resources/images/ryoma.jpeg", + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + second = await service.on_resource_added( + ctx=request_context, + resource_uri="viking://resources/images/fuji.jpeg", + reason="这是不二周助的照片", + source_name="fuji.jpeg", + ) + + assert first["session_id"] == _RESOURCE_REASON_SESSION_ID + assert second["session_id"] == _RESOURCE_REASON_SESSION_ID + assert [call["session_id"] for call in session_service.got] == [ + _RESOURCE_REASON_SESSION_ID, + _RESOURCE_REASON_SESSION_ID, + ] + assert [call["session_id"] for call in session_service.committed] == [ + _RESOURCE_REASON_SESSION_ID, + _RESOURCE_REASON_SESSION_ID, + ] + assert session_service.deleted == [] + messages = [item["parts"][0].text for item in session_service.session.messages] + assert "这是越前龙马的照片" in messages[0] + assert "这是不二周助的照片" in messages[1] + + +@pytest.mark.asyncio +async def test_on_resource_added_routes_reason_to_actor_peer(request_context): + peer_ctx = RequestContext( + user=request_context.user, + role=request_context.role, + actor_peer_id="web-visitor-alice", + ) + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + result = await service.on_resource_added( + ctx=peer_ctx, + resource_uri="viking://resources/images/ryoma.jpeg", + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + + assert result["session_id"] == _RESOURCE_REASON_SESSION_ID + assert session_service.session.meta.memory_policy == { + "self": {"enabled": False}, + "peer": {"enabled": True}, + "memory_types": ["entities", "events", "preferences"], + } + assert session_service.session.messages[0]["peer_id"] == "web-visitor-alice" + assert session_service.committed == [ + { + "ctx": peer_ctx, + "session_id": _RESOURCE_REASON_SESSION_ID, + "keep_recent_count": 0, + } + ] + + +@pytest.mark.asyncio +async def test_on_resource_added_routes_peer_resource_uri_to_peer(request_context): + resource_uri = "viking://user/alice/peers/web-visitor-alice/resources/images/ryoma.jpeg" + session_service = _FakeSessionService() + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({}), + session_service=session_service, + ) + + await service.on_resource_added( + ctx=request_context, + resource_uri=resource_uri, + reason="这是越前龙马的照片", + source_name="ryoma.jpeg", + ) + + assert session_service.session.meta.memory_policy == { + "self": {"enabled": False}, + "peer": {"enabled": True}, + "memory_types": ["entities", "events", "preferences"], + } + assert session_service.session.messages[0]["peer_id"] == "web-visitor-alice" + + +@pytest.mark.asyncio +async def test_read_resource_directory_abstract_uses_parent_abstract(request_context): + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS({"viking://resources/images/.abstract.md": "动漫角色照片合集"}) + ) + + abstract = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert abstract == "动漫角色照片合集" + + +@pytest.mark.asyncio +async def test_read_resource_directory_abstract_ignores_missing_or_not_ready( + request_context, +): + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({})) + + missing = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert missing == "" + + service = ResourceMemoryLinkService( + viking_fs=_FakeVikingFS( + { + "viking://resources/images/.abstract.md": ( + "# viking://resources/images [Directory abstract is not ready]" + ) + } + ) + ) + + not_ready = await service._read_resource_directory_abstract( + "viking://resources/images/yueqian.jpeg", + request_context, + ) + + assert not_ready == "" + + +@pytest.mark.asyncio +async def test_find_referencing_memories_uses_memory_refs(request_context): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/docs/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + + matches = await service._find_referencing_memories( + ctx=request_context, + resource_uri="viking://resources/docs", + recursive=True, + ) + + assert len(matches) == 1 + assert matches[0].memory_uri == memory_uri + assert matches[0].resource_ref["resource_uri"] == resource_uri + + +@pytest.mark.asyncio +async def test_find_referencing_memories_scans_actor_peer_memory(request_context): + peer_ctx = RequestContext( + user=request_context.user, + role=request_context.role, + actor_peer_id="web-visitor-alice", + ) + memory_uri = "viking://user/alice/peers/web-visitor-alice/memories/entities/wang.md" + resource_uri = "viking://resources/docs/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + + matches = await service._find_referencing_memories( + ctx=peer_ctx, + resource_uri=resource_uri, + recursive=True, + ) + + assert len(matches) == 1 + assert matches[0].memory_uri == memory_uri + assert matches[0].resource_ref["resource_uri"] == resource_uri + + +@pytest.mark.asyncio +async def test_before_resource_delete_removes_refs_when_cleanup_has_no_changes(request_context): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/id_card.pdf" + raw = ( + "王大锤资料。\n\n" + "" + ) + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS({memory_uri: raw})) + service._cleanup_memory_reference = AsyncMock(return_value=MemoryUpdateResult()) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + mf = MemoryFileUtils.read(service._get_viking_fs().store[memory_uri], uri=memory_uri) + assert "resource_refs" not in mf.extra_fields + + +@pytest.mark.asyncio +async def test_cleanup_memory_reference_does_not_introduce_schema_metadata(request_context): + memory_uri = "viking://user/ryoma/memories/entities/动漫角色/不二周助-write-test3.md" + resource_uri = "viking://resources/images/2026/06/10/不二周助_jpeg" + original_raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"今天是清明节。[用户保存了一张不二周助的照片]({resource_uri})", + extra_fields={ + "resource_refs": [ + { + "resource_uri": resource_uri, + "source": "content.write", + } + ] + }, + ) + ) + store = {memory_uri: original_raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + result = await service._cleanup_memory_reference( + ctx=request_context, + memory_uri=memory_uri, + memory_file=MemoryFileUtils.read(original_raw, uri=memory_uri), + resource_uri=resource_uri, + reason="", + ) + + assert result.edited_uris == [memory_uri] + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == "今天是清明节。" + assert mf.extra_fields == {} + assert mf.memory_type is None + + +@pytest.mark.asyncio +async def test_cleanup_memory_reference_deletes_empty_memory_shell( + request_context, + monkeypatch, +): + memory_uri = "viking://user/ryoma/memories/entities/动漫角色/越前龙马.md" + resource_uri = "viking://resources/images/2026/06/11/yueqian_jpeg" + original_raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=f"[用户保存了一张越前龙马的照片]({resource_uri})", + extra_fields={ + "category": "动漫角色", + "name": "越前龙马", + "user_id": "ryoma", + "memory_type": "entities", + }, + ) + ) + store = {memory_uri: original_raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await service._cleanup_memory_reference( + ctx=request_context, + memory_uri=memory_uri, + memory_file=MemoryFileUtils.read(original_raw, uri=memory_uri), + resource_uri=resource_uri, + reason="这是越前龙马的照片", + ) + + assert memory_uri not in store + assert service._get_viking_fs().rm_calls == [(memory_uri, False)] + assert result.edited_uris == [] + assert result.deleted_uris == [memory_uri] + refresh_overview.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_before_resource_delete_cleans_visible_uri_without_resource_refs( + request_context, + monkeypatch, +): + memory_uri = "viking://user/alice/memories/events/2026/06/11/yueqian.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + f"今天是清明节。\n用户昨晚查看了[越前龙马照片]({resource_uri}),之后可参考该资源。" + ), + extra_fields={"memory_type": "events"}, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + assert result["memory_uris"] == [memory_uri] + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == "今天是清明节。" + assert "resource_refs" not in mf.extra_fields + + +@pytest.mark.asyncio +async def test_before_resource_delete_exact_keeps_child_resource_refs( + request_context, +): + memory_uri = "viking://user/alice/memories/entities/photos.md" + resource_uri = "viking://resources/images/album" + child_uri = f"{resource_uri}/child.jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + f"用户保存了[相册资源]({resource_uri})。\n用户保存了[相册里的子图]({child_uri})。" + ), + extra_fields={ + "resource_refs": [ + {"resource_uri": resource_uri, "source": "content.write"}, + {"resource_uri": child_uri, "source": "content.write"}, + ], + }, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + recursive=False, + ) + + assert result["status"] == "success" + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert f"[相册资源]({resource_uri})" not in mf.content + assert f"[相册里的子图]({child_uri})" in mf.content + refs = mf.extra_fields["resource_refs"] + assert refs == [{"resource_uri": child_uri, "source": "content.write"}] + + +@pytest.mark.asyncio +async def test_before_resource_delete_deletes_previous_failed_cleanup_artifact( + request_context, + monkeypatch, +): + memory_uri = "viking://user/alice/memories/events/2026/06/11/yueqian.md" + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + raw = MemoryFileUtils.write( + MemoryFile( + uri=memory_uri, + content=( + f"Summary: 用户查看了[越前龙马照片]({resource_uri})。\n" + "None ChatLog:\n" + f"[[user]: Deleted resource URI:]({resource_uri})\n" + "Original reason: \n" + f"Memory URI: {memory_uri}" + ), + extra_fields={"memory_type": "events"}, + ) + ) + store = {memory_uri: raw} + service = ResourceMemoryLinkService(viking_fs=_FakeVikingFS(store)) + refresh_overview = AsyncMock() + monkeypatch.setattr( + "openviking.service.resource_memory_link_service.MemoryUpdater.refresh_schema_overview", + refresh_overview, + ) + + result = await service.before_resource_delete( + ctx=request_context, + resource_uri=resource_uri, + ) + + assert result["status"] == "success" + assert result["deleted_memory_uris"] == [memory_uri] + assert memory_uri not in store + refresh_overview.assert_awaited_once() + + +@pytest.mark.asyncio +async def test_assert_resource_unlinked_propagates_non_not_found_errors(request_context): + service = ResourceMemoryLinkService(viking_fs=_ReadFailVikingFS()) + + with pytest.raises(RuntimeError, match="storage unavailable"): + await service._assert_resource_unlinked( + "viking://user/alice/memories/entities/wang.md", + "viking://resources/id_card.pdf", + request_context, + ) diff --git a/tests/session/memory/test_memory_react_system_prompt.py b/tests/session/memory/test_memory_react_system_prompt.py index e14f5e2743..26a1a6ab9a 100644 --- a/tests/session/memory/test_memory_react_system_prompt.py +++ b/tests/session/memory/test_memory_react_system_prompt.py @@ -50,6 +50,37 @@ def test_instruction_explains_peer_memory_routing(self): assert "profile/preferences/entities/events" in instruction assert "cases/patterns/tools/skills" in instruction + def test_instruction_omits_resource_uri_handling_without_resource_uri(self): + provider = SessionExtractContextProvider( + messages=[Message(id="m1", role="user", parts=[TextPart("我喜欢越前龙马。")])] + ) + + instruction = provider.instruction() + + assert "Resource URI Handling" not in instruction + + def test_instruction_includes_resource_uri_handling_for_user_scoped_resource_uri(self): + provider = SessionExtractContextProvider( + messages=[ + Message( + id="m1", + role="user", + parts=[ + TextPart( + "这张图是越前龙马:" + "viking://user/ryoma/peers/fuji/resources/images/yueqian_jpeg" + ) + ], + ) + ] + ) + + instruction = provider.instruction() + + assert "Resource URI Handling" in instruction + assert "viking://user/{user_id}/resources/..." in instruction + assert "viking://user/{user_id}/peers/{peer_id}/resources/..." in instruction + class TestSkillToolCallExposure: def test_assemble_conversation_includes_skill_tool_call(self): diff --git a/tests/session/memory/test_memory_updater.py b/tests/session/memory/test_memory_updater.py index 82161d7667..1a548c2c79 100644 --- a/tests/session/memory/test_memory_updater.py +++ b/tests/session/memory/test_memory_updater.py @@ -35,6 +35,7 @@ MemoryFileUtils, parse_memory_file_with_fields, ) +from openviking_cli.exceptions import NotFoundError from openviking_cli.session.user_id import UserIdentifier @@ -101,6 +102,43 @@ def test_extract_context_initializes_page_id_map(self): page_id = extract_context.page_id_map.get_page_id("viking://user/a/memories/profile.md") assert page_id == 1 + def test_extract_context_resource_event_content_hides_add_resource_fields(self): + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + extract_context = ExtractContext( + messages=[ + Message( + id="1", + role="user", + parts=[ + TextPart( + text=( + "## Resource Addition\n" + f"Resource URI: {resource_uri}\n" + "Source name: yueqian.jpeg\n" + "Added at: 2026-06-12T03:43:36.343325+00:00\n" + "Resource abstract: This directory contains an anime illustration.\n" + "User reason: 这是越前龙马的照片" + ) + ) + ], + created_at="2026-06-12T03:43:36.343325+00:00", + ) + ] + ) + + content = extract_context.get_resource_event_content( + "0", + f"2026-06-12,用户保存了粉丝创作的越前龙马动漫插画资源,资源URI为{resource_uri}。", + ) + + assert ( + content == f"2026-06-12,[用户保存了粉丝创作的越前龙马动漫插画资源]({resource_uri})。" + ) + assert "Resource URI" not in content + assert "Added at" not in content + assert "Resource abstract" not in content + assert "User reason" not in content + def test_create(self): """Test creating a MemoryUpdater.""" updater = MemoryUpdater() @@ -125,6 +163,81 @@ def test_set_registry(self): assert updater._registry == registry + @pytest.mark.asyncio + async def test_generate_overview_deletes_empty_overview_via_rm(self): + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + class FakeVikingFS: + def __init__(self): + self.rm_calls = [] + + async def ls(self, uri, show_all_hidden=False, ctx=None): + return [{"name": ".overview.md", "isDir": False}] + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + + viking_fs = FakeVikingFS() + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=viking_fs) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.generate_overview( + "entities", + "viking://user/alice/memories/entities/动漫角色", + ctx, + ) + + assert viking_fs.rm_calls == [ + ("viking://user/alice/memories/entities/动漫角色/.overview.md", False), + ("viking://user/alice/memories/entities/动漫角色", True), + ] + + @pytest.mark.asyncio + async def test_generate_overview_skips_deleted_directory(self): + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + class FakeVikingFS: + def __init__(self): + self.rm_calls = [] + + async def ls(self, uri, show_all_hidden=False, ctx=None): + raise NotFoundError(uri, "directory") + + async def rm(self, uri, recursive=False, ctx=None, lock_handle=None): + self.rm_calls.append((uri, recursive)) + + viking_fs = FakeVikingFS() + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=viking_fs) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.generate_overview( + "entities", + "viking://user/alice/memories/entities/动漫角色", + ctx, + ) + + assert viking_fs.rm_calls == [] + @pytest.mark.asyncio async def test_apply_operations_preserves_pre_resolved_multi_uris_for_new_page_ids(self): registry = MagicMock() @@ -277,9 +390,13 @@ async def test_apply_operations_skips_link_updates_for_deleted_uris(self, monkey updater.generate_overview = AsyncMock() mock_viking_fs = MagicMock() - mock_viking_fs.read_file = AsyncMock( - side_effect=AssertionError("deleted URI should not be read") - ) + + async def mock_read_file(uri, **kwargs): + if uri == deleted_uri: + raise AssertionError("deleted URI should not be read") + return MemoryFileUtils.write(MemoryFile(uri=uri, content="new content")) + + mock_viking_fs.read_file = AsyncMock(side_effect=mock_read_file) mock_viking_fs.write_file = AsyncMock() updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) @@ -318,7 +435,9 @@ async def mock_apply_delete(uri, ctx): assert result.written_uris == [written_uri] assert result.deleted_uris == [deleted_uri] - mock_viking_fs.read_file.assert_not_awaited() + assert deleted_uri not in [ + call.args[0] for call in mock_viking_fs.read_file.await_args_list + ] @pytest.mark.asyncio async def test_apply_operations_routes_backlinks_to_matching_uri_only(self): @@ -392,6 +511,203 @@ async def mock_write_file(uri, content, **kwargs): assert [link["to_uri"] for link in caroline["backlinks"]] == [caroline_uri] assert melanie.get("backlinks", []) == [] + @pytest.mark.asyncio + async def test_apply_operations_does_not_write_backlinks_to_resource_targets(self): + memory_uri = "viking://user/alice/memories/entities/wang.md" + resource_uri = "viking://resources/id_card.pdf" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + if uri == resource_uri: + raise AssertionError("resource target should not be read as a memory file") + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + if uri == resource_uri: + raise AssertionError("resource target should not be written as a memory file") + store[uri] = content + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock() + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "王大锤", + "content": "王大锤的身份证资料见资源。", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + resolved_links=[ + StoredLink( + from_uri=memory_uri, + to_uri=resource_uri, + link_type="references_resource", + match_text="资源", + ) + ], + ) + + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + memory = parse_memory_file_with_fields(store[memory_uri]) + assert memory["links"][0]["to_uri"] == resource_uri + assert resource_uri not in store + + @pytest.mark.asyncio + async def test_apply_operations_syncs_markdown_resource_refs_before_vectorize(self): + memory_uri = "viking://user/alice/memories/entities/fuji.md" + resource_uri = "viking://resources/images/2026/06/11/fuji_jpeg" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + store[uri] = content + + async def assert_vectorized_after_resource_ref_sync(*args, **kwargs): + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.extra_fields["resource_refs"][0]["source"] == "session.commit" + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock( + side_effect=assert_vectorized_after_resource_ref_sync + ) + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "不二周助", + "content": f"用户保存了一张[不二周助]({resource_uri})的照片", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + ) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == f"用户保存了一张[不二周助]({resource_uri})的照片" + assert mf.links == [] + assert mf.extra_fields["resource_refs"] == [ + { + "resource_uri": resource_uri, + "source": "session.commit", + "created_at": mf.extra_fields["resource_refs"][0]["created_at"], + "match_text": "不二周助", + } + ] + + @pytest.mark.asyncio + async def test_apply_operations_linkifies_bare_resource_uri(self): + memory_uri = "viking://user/alice/memories/entities/fuji.md" + resource_uri = "viking://resources/images/2026/06/11/fuji_jpeg" + + schema = MemoryTypeSchema( + memory_type="entities", + description="entity memory", + directory="viking://user/{{ user_space }}/memories/entities", + filename_template="{{ name }}.md", + fields=[], + overview_template="overview", + ) + registry = MagicMock() + registry.get.return_value = schema + + store = {} + mock_viking_fs = MagicMock() + + async def mock_read_file(uri, **kwargs): + return store.get(uri) + + async def mock_write_file(uri, content, **kwargs): + store[uri] = content + + mock_viking_fs.read_file = mock_read_file + mock_viking_fs.write_file = mock_write_file + + updater = MemoryUpdater(registry=registry) + updater._get_viking_fs = MagicMock(return_value=mock_viking_fs) + updater._vectorize_memories = AsyncMock() + updater.generate_overview = AsyncMock() + + operations = ResolvedOperations( + upsert_operations=[ + ResolvedOperation( + memory_fields={ + "name": "不二周助", + "content": f"今天是清明节。用户保存了一张不二周助的照片 {resource_uri}", + }, + memory_type="entities", + uris=[memory_uri], + page_id=100, + ) + ], + delete_file_contents=[], + errors=[], + ) + ctx = RequestContext(user=UserIdentifier("acme", "alice"), role=Role.USER) + + await updater.apply_operations(operations=operations, ctx=ctx) + + mf = MemoryFileUtils.read(store[memory_uri], uri=memory_uri) + assert mf.content == f"今天是清明节。[用户保存了一张不二周助的照片]({resource_uri})" + assert mf.extra_fields["resource_refs"][0]["resource_uri"] == resource_uri + assert mf.extra_fields["resource_refs"][0]["source"] == "session.commit" + assert mf.extra_fields["resource_refs"][0]["match_text"] == "用户保存了一张不二周助的照片" + # The TestApplyWriteWithContentInFields tests are outdated because WriteOp no longer exists # The _apply_write method now accepts any flat model (dict or Pydantic model) that diff --git a/tests/session/memory/test_resource_refs.py b/tests/session/memory/test_resource_refs.py new file mode 100644 index 0000000000..ae9933f112 --- /dev/null +++ b/tests/session/memory/test_resource_refs.py @@ -0,0 +1,89 @@ +# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd. +# SPDX-License-Identifier: AGPL-3.0 + +from openviking.session.memory.dataclass import MemoryFile +from openviking.session.memory.utils.resource_refs import ( + contains_resource_uri, + extract_resource_uris, + sync_memory_resource_refs, +) + + +def test_extract_resource_uris_stops_at_common_sentence_delimiters(): + cases = [ + ( + "看了 viking://resources/images/foo.jpeg,觉得不错", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg。还看了别的", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg;然后记录", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg!真的好", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg?真的好", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg、还有别的", + "viking://resources/images/foo.jpeg", + ), + ( + "看了 viking://resources/images/foo.jpeg)然后记录", + "viking://resources/images/foo.jpeg", + ), + ( + "read viking://resources/images/foo.jpeg, then commented", + "viking://resources/images/foo.jpeg", + ), + ] + + for content, expected in cases: + assert extract_resource_uris(content) == [expected] + + +def test_extract_resource_uris_does_not_match_resource_prefix_words(): + cases = [ + "viking://resources2/images/foo", + "viking://resources-old/images/foo", + "viking://user/alice/resources2/images/foo", + "viking://user/alice/resources-old/images/foo", + "viking://user/alice/peers/bob/resources2/images/foo", + "viking://user/alice/peers/bob/resources-old/images/foo", + ] + + for content in cases: + assert not contains_resource_uri(content) + assert extract_resource_uris(content) == [] + + +def test_sync_memory_resource_refs_keeps_bare_uri_clean_before_chinese_punctuation(): + resource_uri = "viking://resources/images/2026/06/12/yueqian_jpeg" + mf = MemoryFile( + content=( + f"昨天晚上我看了 {resource_uri},这张图是越前龙马的照片。" + "以后提到越前龙马照片,可以参考这个资源。" + ), + extra_fields={}, + ) + + changed = sync_memory_resource_refs(mf, source="session.commit") + + assert changed is True + assert f"]({resource_uri}),这张图是越前龙马的照片。" in mf.content + refs = mf.extra_fields["resource_refs"] + assert refs == [ + { + "resource_uri": resource_uri, + "source": "session.commit", + "created_at": refs[0]["created_at"], + "match_text": "昨天晚上我看了", + } + ] diff --git a/tests/test_link_renderer.py b/tests/test_link_renderer.py index e4c03c737f..429a25438b 100644 --- a/tests/test_link_renderer.py +++ b/tests/test_link_renderer.py @@ -339,6 +339,11 @@ def test_no_links(self): result = LinkRenderer.strip_links(content) assert result == content + def test_strip_all_links_removes_viking_uri_targets_for_embedding(self): + content = "用户上传了一张[越前龙马](viking://resources/images/yueqian_jpeg)的照片。" + result = LinkRenderer.strip_all_links(content) + assert result == "用户上传了一张越前龙马的照片。" + class TestRoundTrip: def test_render_then_strip(self): diff --git a/tests/unit/service/test_core_encryption_startup.py b/tests/unit/service/test_core_encryption_startup.py index e05a753c2f..dfe1d7c4db 100644 --- a/tests/unit/service/test_core_encryption_startup.py +++ b/tests/unit/service/test_core_encryption_startup.py @@ -8,7 +8,6 @@ import pytest -from openviking.pyagfs.exceptions import AGFSNotFoundError from openviking.service.core import OpenVikingService from openviking.utils.agfs_utils import RagfsBindingConfig @@ -67,40 +66,6 @@ async def _bootstrap(config: dict) -> _FakeEncryptor: assert isinstance(service._encryptor, _FakeEncryptor) -@pytest.mark.parametrize( - ("encrypted_mode", "raw", "message"), - [ - (True, b"{}", "plaintext"), - (False, b"OVE1ciphertext", "encrypted"), - ], -) -def test_probe_storage_shape_rejects_mode_mismatch(encrypted_mode, raw, message): - """Reject existing system metadata whose shape differs from current encryption mode.""" - - class _Client: - def read_raw(self, path: str) -> bytes: - assert path == "/local/_system/accounts.json" - return raw - - service = OpenVikingService.__new__(OpenVikingService) - - with pytest.raises(RuntimeError, match=message): - service._probe_storage_shape(_Client(), encrypted_mode) - - -def test_probe_storage_shape_allows_empty_system(): - """Treat missing system metadata as a fresh system.""" - - class _Client: - def read_raw(self, path: str) -> bytes: - assert path == "/local/_system/accounts.json" - raise AGFSNotFoundError("not found") - - service = OpenVikingService.__new__(OpenVikingService) - - service._probe_storage_shape(_Client(), encrypted_mode=True) - - def test_ensure_data_dir_lock_acquired_once(monkeypatch, tmp_path): """Acquire the data-dir lock once before startup encryption bootstrap.""" diff --git a/tests/unit/session/memory/test_embedding_template.py b/tests/unit/session/memory/test_embedding_template.py index 8f39655925..cbf8de5891 100644 --- a/tests/unit/session/memory/test_embedding_template.py +++ b/tests/unit/session/memory/test_embedding_template.py @@ -33,7 +33,10 @@ def setup(self): def test_events_exposes_embedding_template(self): schema = self.registry.get("events") - assert schema.embedding_template == "{{ event_name }}\n\n{{ goal }}\n\n{{ content }}" + assert ( + schema.embedding_template + == "EventName: {{ event_name }}\nGoal: {{ goal }}\n{{ content }}" + ) def test_preferences_exposes_embedding_template(self): schema = self.registry.get("preferences") diff --git a/tests/unit/test_extra_headers_vlm.py b/tests/unit/test_extra_headers_vlm.py index 1f0687df47..c7c12eaf06 100644 --- a/tests/unit/test_extra_headers_vlm.py +++ b/tests/unit/test_extra_headers_vlm.py @@ -316,6 +316,52 @@ def run_in_thread_loop(): assert result == [worker_loop_client] assert build_async_client.call_count == 2 + @patch("volcenginesdkarkruntime.Ark") + def test_volcengine_sync_client_applies_timeout_and_disables_sdk_retries( + self, + mock_ark_class, + ): + mock_ark_class.return_value = MagicMock() + + vlm = VolcEngineVLM( + { + "api_key": "sk-test", + "api_base": "https://ark.cn-beijing.volces.com/api/v3", + "timeout": 12.0, + "max_retries": 5, + } + ) + + _ = vlm.get_client() + + mock_ark_class.assert_called_once() + call_kwargs = mock_ark_class.call_args[1] + assert call_kwargs["timeout"] == 12.0 + assert call_kwargs["max_retries"] == 0 + + @patch("volcenginesdkarkruntime.AsyncArk") + def test_volcengine_async_client_applies_timeout_and_disables_sdk_retries( + self, + mock_async_ark_class, + ): + mock_async_ark_class.return_value = MagicMock() + + vlm = VolcEngineVLM( + { + "api_key": "sk-test", + "api_base": "https://ark.cn-beijing.volces.com/api/v3", + "timeout": 12.0, + "max_retries": 5, + } + ) + + _ = vlm._build_async_client() + + mock_async_ark_class.assert_called_once() + call_kwargs = mock_async_ark_class.call_args[1] + assert call_kwargs["timeout"] == 12.0 + assert call_kwargs["max_retries"] == 0 + @patch("openviking.models.vlm.backends.openai_vlm.openai.AzureOpenAI") def test_azure_sync_client_disables_sdk_retries(self, mock_azure_openai_class): mock_azure_openai_class.return_value = MagicMock()