diff --git a/apps/lobehub-compatibility/MirothinkerToolParser.py b/apps/lobehub-compatibility/MirothinkerToolParser.py new file mode 100644 index 00000000..ec12fe0f --- /dev/null +++ b/apps/lobehub-compatibility/MirothinkerToolParser.py @@ -0,0 +1,440 @@ +""" +Tool parser plugin for vLLM for MiroThinker MCP format to compatible with the tool calling interface of openai. +MCP format: + + server name + tool name + + {...} + + +""" + +import json +from collections.abc import Sequence +import json_repair +import regex as re + +from vllm.entrypoints.chat_utils import make_tool_call_id +from vllm.entrypoints.openai.protocol import ( + ChatCompletionRequest, + DeltaFunctionCall, + DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, + ToolCall, +) +from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( + ToolParser, + ToolParserManager, +) +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +class MirothinkerToolParser(ToolParser): + def __init__(self, tokenizer): + super().__init__(tokenizer) + + # State tracking for streaming + self.current_tool_name_sent: bool = False + self.prev_tool_call_arr: list[dict] = [] + self.current_tool_id: int = -1 + self.streamed_args_for_tool: list[str] = [] + self.buffer: str = "" # Buffer for potential tool call tags + self._resolved_tool_name_cache: dict[tuple[str, str], str] = {} + + # Correctness-first streaming state (incremental state machine) + self._stream_mode: str = "text" # "text" | "tool" + self._text_token_prefix: str = "" # possible prefix of + self._tool_end_token_prefix: str = "" # possible prefix of + self._tool_block_buffer: str = ( + "" # accumulates between and + ) + self._stream_tool_call_ids: list[str] = [] + + # Token definitions + self.tool_call_start_token: str = "" + self.tool_call_end_token: str = "" + + # Regex patterns + self.tool_call_regex = re.compile( + r"\s*" + r"(.*?)\s*" + r"(.*?)\s*" + r"\s*(.*?)\s*\s*" + r"", + re.DOTALL, + ) + + # For streaming partial tool calls + # IMPORTANT: Use GREEDY matching (.*) for arguments to capture all content + # in streaming mode. We'll clean up tag in the code if present. + # The outer ()? makes the whole section optional + # The inner (.*) will match empty string if exists but has no content yet + self.partial_tool_regex = re.compile( + r"\s*" + r"(?:(.*?)\s*)?" + r"(?:(.*?)\s*)?" + r"(?:(\s*.*))?", # Move \s* inside capture group so empty match returns "" + re.DOTALL, + ) + + # For correctness-first parsing on COMPLETE tool blocks only + self._complete_tool_block_regex = re.compile( + r"\s*" + r"(?:(.*?)\s*)?" + r"(?:(.*?)\s*)?" + r"(?:\s*(.*?)\s*(?:\s*)?)?" + r"", + re.DOTALL, + ) + + def _resolve_tool_name( + self, server_name: str, tool_name: str, request: ChatCompletionRequest + ) -> str: + """ + Resolve the actual tool name by combining server_name and tool_name + if server_name is not 'default'. + """ + if not server_name or server_name == "default": + return tool_name + + if not request or not request.tools: + return tool_name + + cache_key = (server_name, tool_name) + cached = self._resolved_tool_name_cache.get(cache_key) + if cached: + return cached + + # Filter tools that contain server_name + candidates = [] + for tool in request.tools: + if hasattr(tool, "function") and hasattr(tool.function, "name"): + name = tool.function.name + if tool_name in name: + candidates.append(name) + if len(candidates) == 1: + resolved = candidates[0] + self._resolved_tool_name_cache[cache_key] = resolved + return resolved + # Find match containing tool_name + for candidate in candidates: + if server_name in candidate: + logger.debug( + "Resolved tool %s -> %s (server: %s)", + tool_name, + candidate, + server_name, + ) + self._resolved_tool_name_cache[cache_key] = candidate + return candidate + + return tool_name + + def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest: + request = super().adjust_request(request) + if request.tools and request.tool_choice != "none": + # Do not skip special tokens for proper tool parsing + request.skip_special_tokens = False + return request + + def _ensure_tool_id_valid(self, tool_id: int) -> bool: + """Ensure the tool_id is valid and arrays have enough elements""" + if tool_id < 0: + return False + + # Ensure arrays are large enough + while len(self.streamed_args_for_tool) <= tool_id: + self.streamed_args_for_tool.append("") + while len(self.prev_tool_call_arr) <= tool_id: + self.prev_tool_call_arr.append({}) + + return True + + def extract_tool_calls( + self, + model_output: str, + request: ChatCompletionRequest, + ) -> ExtractedToolCallInformation: + # Sanity check; avoid unnecessary processing + if logger.isEnabledFor(10): # DEBUG + logger.debug("model_output len=%s", len(model_output)) + if ( + self.tool_call_start_token not in model_output + or request.tool_choice == "none" + or not request.tools + ): + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + + try: + tool_calls = [] + had_any_match = False + had_parse_error = False + # Find all complete tool calls + for match in self.tool_call_regex.finditer(model_output): + had_any_match = True + server_name = match.group(1).strip() + tool_name = match.group(2).strip() + arguments_str = match.group(3).strip() + + # Resolve tool name + tool_name = self._resolve_tool_name(server_name, tool_name, request) + + try: + # Parse arguments as JSON + arguments = json.loads(arguments_str) + + tool_call = ToolCall( + type="function", + function=FunctionCall( + name=tool_name, + arguments=json.dumps(arguments, ensure_ascii=False), + ), + ) + tool_calls.append(tool_call) + + except json.JSONDecodeError: + try: + repaired = json_repair.repair_json(arguments_str) + if not repaired: + had_parse_error = True + logger.warning( + "Failed to repair tool arguments JSON: %s", + arguments_str, + ) + continue + + arguments = json.loads(repaired) + tool_call = ToolCall( + type="function", + function=FunctionCall( + name=tool_name, + arguments=json.dumps(arguments, ensure_ascii=False), + ), + ) + tool_calls.append(tool_call) + except Exception: + had_parse_error = True + logger.warning( + "Failed to parse tool arguments after repair: %s", + arguments_str, + ) + continue + + # If we couldn't successfully parse tool calls (or format didn't match), do not truncate. + # Return the full model output as content to avoid losing text. + if had_parse_error or not tool_calls or not had_any_match: + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + + # Extract content before first tool call + content = model_output[: model_output.find(self.tool_call_start_token)] + + return ExtractedToolCallInformation( + tools_called=len(tool_calls) > 0, + tool_calls=tool_calls, + content=content if content else None, + ) + + except Exception: + logger.exception("Error in extracting tool call from response.") + return ExtractedToolCallInformation( + tools_called=False, tool_calls=[], content=model_output + ) + + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> DeltaMessage | None: + # Reset state if this is the start of a new request + if not previous_text: + self.current_tool_name_sent = False + self.prev_tool_call_arr = [] + self.current_tool_id = -1 + self.streamed_args_for_tool = [] + self.buffer = "" + self._resolved_tool_name_cache = {} + + self._stream_mode = "text" + self._text_token_prefix = "" + self._tool_end_token_prefix = "" + self._tool_block_buffer = "" + self._stream_tool_call_ids = [] + + # If tools are disabled for this request, do not suppress tags or parse tool calls. + # Flush any internal buffers as plain text so we never drop output. + if request.tool_choice == "none" or not request.tools: + out = "" + if self.buffer: + out += self.buffer + self.buffer = "" + if self._text_token_prefix: + out += self._text_token_prefix + self._text_token_prefix = "" + if self._tool_block_buffer: + out += self.tool_call_start_token + self._tool_block_buffer + self._tool_block_buffer = "" + if self._tool_end_token_prefix: + out += self._tool_end_token_prefix + self._tool_end_token_prefix = "" + out += delta_text + return DeltaMessage(content=out) if out else None + + def _longest_token_prefix_at_end(s: str, token: str) -> str: + max_len = min(len(token) - 1, len(s)) + for i in range(max_len, 0, -1): + if token.startswith(s[-i:]): + return s[-i:] + return "" + + emitted_text_parts: list[str] = [] + emitted_tool_calls: list[DeltaToolCall] = [] + + chunk = delta_text + + while chunk: + if self._stream_mode == "text": + if self._text_token_prefix: + chunk = self._text_token_prefix + chunk + self._text_token_prefix = "" + + start_idx = chunk.find(self.tool_call_start_token) + if start_idx < 0: + prefix = _longest_token_prefix_at_end( + chunk, self.tool_call_start_token + ) + if prefix: + safe = chunk[: -len(prefix)] + if safe: + emitted_text_parts.append(safe) + self._text_token_prefix = prefix + else: + emitted_text_parts.append(chunk) + break + + before = chunk[:start_idx] + if before: + emitted_text_parts.append(before) + chunk = chunk[start_idx + len(self.tool_call_start_token) :] + self._stream_mode = "tool" + self._tool_block_buffer = "" + self._tool_end_token_prefix = "" + continue + + # tool mode + if self._tool_end_token_prefix: + chunk = self._tool_end_token_prefix + chunk + self._tool_end_token_prefix = "" + + end_idx = chunk.find(self.tool_call_end_token) + if end_idx < 0: + prefix = _longest_token_prefix_at_end(chunk, self.tool_call_end_token) + if prefix: + self._tool_block_buffer += chunk[: -len(prefix)] + self._tool_end_token_prefix = prefix + else: + self._tool_block_buffer += chunk + break + + # Complete tool block + self._tool_block_buffer += chunk[:end_idx] + tool_block = ( + self.tool_call_start_token + + self._tool_block_buffer + + self.tool_call_end_token + ) + remainder = chunk[end_idx + len(self.tool_call_end_token) :] + + # Reset tool buffers before parsing + self._stream_mode = "text" + self._tool_block_buffer = "" + self._tool_end_token_prefix = "" + + try: + m = self._complete_tool_block_regex.search(tool_block) + if not m: + emitted_text_parts.append(tool_block) + chunk = remainder + continue + + server_name = (m.group(1) or "").strip() + tool_name = (m.group(2) or "").strip() + arguments_str = (m.group(3) or "").strip() + + if not tool_name: + emitted_text_parts.append(tool_block) + chunk = remainder + continue + + resolved_name = ( + self._resolve_tool_name(server_name, tool_name, request) + if server_name + else tool_name + ) + + # Finalize arguments strictly at end of the block + if not arguments_str: + arguments_json_str = "{}" + else: + try: + arguments_obj = json.loads(arguments_str) + except Exception: + repaired = json_repair.repair_json(arguments_str) + if not repaired: + emitted_text_parts.append(tool_block) + chunk = remainder + continue + arguments_obj = json.loads(repaired) + arguments_json_str = json.dumps(arguments_obj, ensure_ascii=False) + + tool_index = len(self._stream_tool_call_ids) + tool_call_id = make_tool_call_id() + self._stream_tool_call_ids.append(tool_call_id) + + emitted_tool_calls.append( + DeltaToolCall( + index=tool_index, + type="function", + id=tool_call_id, + function=DeltaFunctionCall( + name=resolved_name, + arguments=arguments_json_str, + ).model_dump(exclude_none=True), + ) + ) + + except Exception: + logger.exception( + "Error parsing complete tool block in streaming; falling back to plain text." + ) + emitted_text_parts.append(tool_block) + + chunk = remainder + + emitted_text = "".join(emitted_text_parts) if emitted_text_parts else None + if emitted_text is not None and emitted_text == "": + emitted_text = None + if emitted_text is None and not emitted_tool_calls: + return None + + # vLLM's DeltaMessage.tool_calls is validated as a list; do not pass None explicitly. + if emitted_tool_calls: + return DeltaMessage(content=emitted_text, tool_calls=emitted_tool_calls) + return DeltaMessage(content=emitted_text) + + +# Register the tool parser to ToolParserManager +ToolParserManager.register_module("mirothinker", True, MirothinkerToolParser) diff --git a/apps/lobehub-compatibility/chat_template.jinja b/apps/lobehub-compatibility/chat_template.jinja new file mode 100644 index 00000000..825df661 --- /dev/null +++ b/apps/lobehub-compatibility/chat_template.jinja @@ -0,0 +1,95 @@ +{%- if tools %} + {{- '<|im_start|>system\n' }} + {%- if messages[0].role == 'system' %} + {{messages[0].content + "\n\n" }} + {%- endif %} + {{- "# Tool-Use Formatting Instructions\n\nTool-use is formatted using XML-style tags. The tool-use is enclosed in and each parameter is similarly enclosed within its own set of tags.\n\nThe Model Context Protocol (MCP) connects to servers that provide additional tools and resources to extend your capabilities. You can use the server's tools via the `use_mcp_tool`.\n\nDescription:\nRequest to use a tool provided by a MCP server. Each MCP server can provide multiple tools with different capabilities. Tools have defined input schemas that specify required and optional parameters.\n\nParameters:\n- server_name: (required) The name of the MCP server providing the tool.\n- tool_name: (required) The name of the tool to execute\n- arguments: (required) A JSON object containing the tool's input parameters, following the tool's input schema, quotes within string must be properly escaped, ensure it's valid JSON\n\nUsage:\n\nserver name here\ntool name here\n\n{{\n \"param1\": \"value1\",\n \"param2\": \"value2\"\n}}\n\n\n\nImportant Notes:\n- Tool-use must be placed **at the end** of your response, **top-level**, and not nested within other tags.\n- Always adhere to this format for the tool use to ensure proper parsing and execution.\n\nString and scalar parameters should be specified as is, while lists and objects should use JSON format. Note that spaces for string values are not stripped. The output is not expected to be valid XML and is parsed with regular expressions.\nHere are the functions available in JSONSchema format:\n\n## Server name: default" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {{- "\n" }} + {%- endfor %} + {{- "\n# General Objective\n\nYou accomplish a given task iteratively, breaking it down into clear steps and working through them methodically." }} +{%- else %} + {%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} +{%- for forward_message in messages %} + {%- set index = (messages|length - 1) - loop.index0 %} + {%- set message = messages[index] %} + {%- set current_content = message.content if message.content is not none else '' %} + {%- set tool_start = '' %} + {%- set tool_start_length = tool_start|length %} + {%- set start_of_message = current_content[:tool_start_length] %} + {%- set tool_end = '' %} + {%- set tool_end_length = tool_end|length %} + {%- set start_pos = (current_content|length) - tool_end_length %} + {%- if start_pos < 0 %} + {%- set start_pos = 0 %} + {%- endif %} + {%- set end_of_message = current_content[start_pos:] %} + {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %} + {%- set ns.multi_step_tool = false %} + {%- set ns.last_query_index = index %} + {%- endif %} +{%- endfor %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {%- set content = message.content %} + {%- set reasoning_content = '' %} + {%- if message.reasoning_content is defined and message.reasoning_content is not none %} + {%- set reasoning_content = message.reasoning_content %} + {%- else %} + {%- if '' in message.content %} + {%- set content = (message.content.split('')|last).lstrip('\n') %} + {%- set reasoning_content = (message.content.split('')|first).rstrip('\n') %} + {%- set reasoning_content = (reasoning_content.split('')|last).lstrip('\n') %} + {%- endif %} + {%- endif %} + {%- if loop.index0 > ns.last_query_index %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- else %} + {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content.strip('\n') + '\n\n\n' + content.lstrip('\n') }} + {%- endif %} + {%- if message.tool_calls %} + {%- for tool_call in message.tool_calls %} + {%- if (loop.first and content) or (not loop.first) %} + {{- '\n' }} + {%- endif %} + {%- if tool_call.function %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\ndefault\n' }} + {{- tool_call.name }} + {{- '\n\n' }} + {%- if tool_call.arguments is string %} + {{- tool_call.arguments }} + {%- else %} + {{- tool_call.arguments | tojson }} + {%- endif %} + {{- '\n\n' }} + {%- endfor %} + {%- endif %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- '\n\n\n\n' }} + {%- endif %} +{%- endif %} \ No newline at end of file diff --git a/apps/lobehub-compatibility/img/AI-provider.png b/apps/lobehub-compatibility/img/AI-provider.png new file mode 100644 index 00000000..8ca18da9 Binary files /dev/null and b/apps/lobehub-compatibility/img/AI-provider.png differ diff --git a/apps/lobehub-compatibility/img/model.png b/apps/lobehub-compatibility/img/model.png new file mode 100644 index 00000000..dfaa4c11 Binary files /dev/null and b/apps/lobehub-compatibility/img/model.png differ diff --git a/apps/lobehub-compatibility/img/presentation.gif b/apps/lobehub-compatibility/img/presentation.gif new file mode 100644 index 00000000..ceefee66 Binary files /dev/null and b/apps/lobehub-compatibility/img/presentation.gif differ diff --git a/apps/lobehub-compatibility/img/settings.png b/apps/lobehub-compatibility/img/settings.png new file mode 100644 index 00000000..7d823bf5 Binary files /dev/null and b/apps/lobehub-compatibility/img/settings.png differ diff --git a/apps/lobehub-compatibility/readme.md b/apps/lobehub-compatibility/readme.md new file mode 100644 index 00000000..5a00f686 --- /dev/null +++ b/apps/lobehub-compatibility/readme.md @@ -0,0 +1,62 @@ +# LobeChat Integration Guide + +This guide describes how to integrate the MiroThinker model with [LobeChat](https://github.com/lobehub/lobe-chat), an open-source, modern LLM UI framework supporting tool usage (function calling). + +## 1. Start the Inference Service + +First, launch the MiroThinker model using vLLM with the OpenAI-compatible API adapter. Ensure you include the tool parser plugin. + +```bash +# Configuration +PORT=61002 +MODEL_PATH=miromind-ai/MiroThinker-v1.5-30B + +# Start vLLM server +vllm serve $MODEL_PATH \ + --served-model-name mirothinker \ + --port $PORT \ + --trust-remote-code \ + --chat-template chat_template.jinja \ + --tool-parser-plugin MirothinkerToolParser.py \ + --tool-call-parser mirothinker \ + --enable-auto-tool-choice +``` + +## 2. Configure LobeChat + +You can use either the self-hosted version or the [web application](https://lobechat.com/chat). + +### Step 1: Access Settings + +Navigate to **Settings** -> **AI Service Provider** to add a custom AI service provider. + +![Settings Navigation](img/settings.png) + +### Step 2: Add Custom AI Provider + +Click the `+` button to add a new provider and configure it as follows: + +![Add AI Provider](img/AI-provider.png) + +| Field | Value | Description | +| :--- | :--- | :--- | +| **Provider ID** | `miromind` | Or any identifier you prefer. | +| **Request Format** | `OPENAI` | | +| **API Key** | `your-api-key` | Use any string if auth is disabled. | +| **API Proxy Address** | `http://localhost:61002/v1` | Replace with your actual service address. | + +### Step 3: Configure the Model + +After adding the provider, add the models you deploy to the service provider's model list.: + +1. Add a new model with the ID `mirothinker` (must match `--served-model-name`). +2. **Crucial**: Enable the **Function Calling** capability toggle. +3. Click "Check" to verify connectivity. + +![Model Configuration](img/model.png) + +## 3. Usage Demo + +Once configured, you can use MiroThinker in LobeChat with full tool-calling capabilities. + +![Presentation Demo](img/presentation.gif) diff --git a/apps/lobehub-compatibility/requirements.txt b/apps/lobehub-compatibility/requirements.txt new file mode 100644 index 00000000..04efc82c --- /dev/null +++ b/apps/lobehub-compatibility/requirements.txt @@ -0,0 +1,3 @@ +vllm>=0.11.0 +json-repair +regex \ No newline at end of file