diff --git a/src/llm/io_processing/gptoss/harmony.cpp b/src/llm/io_processing/gptoss/harmony.cpp index 8457b951fd..76f8ee9191 100644 --- a/src/llm/io_processing/gptoss/harmony.cpp +++ b/src/llm/io_processing/gptoss/harmony.cpp @@ -97,7 +97,7 @@ ToolCalls_t Harmony::getToolCalls() { static const std::string tool_prefix = "to=functions."; ToolCalls_t toolCalls; for (const auto& msg : messages) { - if (startsWith(msg.getChannel(), "commentary")) { + if (startsWith(msg.getChannel(), "commentary") || startsWith(msg.getChannel(), "analysis")) { size_t marker = msg.getChannel().find(tool_prefix); if (marker != std::string::npos) { marker += tool_prefix.length(); diff --git a/src/llm/io_processing/gptoss/tool_parser.cpp b/src/llm/io_processing/gptoss/tool_parser.cpp index 1a2e19d45e..fc26195cda 100644 --- a/src/llm/io_processing/gptoss/tool_parser.cpp +++ b/src/llm/io_processing/gptoss/tool_parser.cpp @@ -86,9 +86,11 @@ std::optional GptOssToolParser::parseChunk(const std::strin std::string chunk = newChunk; std::optional result; - if (chunk.find(getParsingStartTags()[0]) != std::string::npos) { - toolCallIndex++; // starting with -1, first call will be 0 - return std::nullopt; + for (const auto& parsingStartTag : getParsingStartTags()) { + if (chunk.find(parsingStartTag) != std::string::npos) { + toolCallIndex++; // starting with -1, first call will be 0 + return std::nullopt; + } } // This should only happen during channel read if model does not produce garbage @@ -158,6 +160,7 @@ std::optional GptOssToolParser::parseChunk(const std::strin // Cut everything after first . // Remove and take only remaining part // The harmony format is: <|channel|>commentary to=functions. <|constrain|>json<|message|>{...}<|call|> + // HACK: This does not conform to OpenAI Harmony format, but we allow for <|channel|>analysis to=... as well std::size_t pos = chunk.find('.'); if (pos != std::string::npos) { chunk = chunk.substr(pos + 1); diff --git a/src/llm/io_processing/gptoss/tool_parser.hpp b/src/llm/io_processing/gptoss/tool_parser.hpp index c7015b809d..faa4143e7d 100644 --- a/src/llm/io_processing/gptoss/tool_parser.hpp +++ b/src/llm/io_processing/gptoss/tool_parser.hpp @@ -58,7 +58,10 @@ class GptOssToolParser : public BaseOutputParser { std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; const std::vector& getParsingStartTags() const override { - static const std::vector parsingStartTags{parsingStartTag}; + static const std::vector parsingStartTags{ + parsingStartTag, + "<|channel|>analysis to=", // THIS IS A HACK: This does not conform to OpenAI Harmony format, however tests show that sometimes model produces tools in analysis channel + }; return parsingStartTags; } diff --git a/src/test/llm/output_parsers/gptoss_output_parser_test.cpp b/src/test/llm/output_parsers/gptoss_output_parser_test.cpp index 83a9318df7..0a5b617544 100644 --- a/src/test/llm/output_parsers/gptoss_output_parser_test.cpp +++ b/src/test/llm/output_parsers/gptoss_output_parser_test.cpp @@ -288,6 +288,36 @@ TEST_F(GptOssOutputUnaryParserTest, SingleToolCallWithConstrain) { } } +// This does not conform to OpenAI Harmony format, however tests show that sometimes model produces tools in analysis channel +TEST_F(GptOssOutputUnaryParserTest, SingleToolCallWithConstrainInAnalysisChannel) { + for (auto closureToken : std::vector{ + Harmony::TokenID::RETURN, // ending with <|return|> + Harmony::TokenID::END, // ending with <|end|> + Harmony::TokenID::CALL}) { // ending with <|call|> + for (auto functionDeclaration : std::vector{ + "analysis to=functions.hello", // valid channel with to= + "analysis to=functions.hello ", + "analysis to=functions.hello", + "analysis ANYTHING IN BETWEEN to=functions.hello", + }) { // spaces after hello + builder + .clear() + .add(Harmony::TokenID::CHANNEL) + .add(functionDeclaration) + .add(Harmony::TokenID::MESSAGE) + .add(R"({"Hello": "world!"})") + .add(closureToken); + Harmony harmony(*gptOssTokenizer, builder.build()); + ASSERT_TRUE(harmony.parse()) << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + ASSERT_EQ(harmony.getContent(), "") << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + ASSERT_EQ(harmony.getReasoning(), "") << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + ASSERT_EQ(harmony.getToolCalls().size(), 1) << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + ASSERT_EQ(harmony.getToolCalls()[0].name, "hello") << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + ASSERT_EQ(harmony.getToolCalls()[0].arguments, R"({"Hello": "world!"})") << "Failed for closure token: " << static_cast(closureToken) << " function declaration: " << functionDeclaration; + } + } +} + TEST_F(GptOssOutputUnaryParserTest, InvalidSingleToolCallWithConstrain) { for (auto closureToken : std::vector{ Harmony::TokenID::RETURN, // ending with <|return|> @@ -576,3 +606,63 @@ TEST_F(GptOssOutputStreamParserTest, HolisticStreamingTools) { }; test(chunkToDeltaVec); } + +// This does not conform to OpenAI Harmony format, however tests show that sometimes model produces tools in analysis channel +TEST_F(GptOssOutputStreamParserTest, HolisticStreamingToolsInAnalysisChannel) { + std::vector>> chunkToDeltaVec{ + // Reasoning + {"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"analysis", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|message|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"I", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":"I"}})"}}, + {" will", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" will"}})"}}, + {" call", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" call"}})"}}, + {" fun", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":" fun"}})"}}, + {"ction.", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"reasoning_content":"ction."}})"}}, + {"<|end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + // Preamble + {"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"commentary", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|message|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"I", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":"I"}})"}}, + {" have", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":" have"}})"}}, + {" to", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":" to"}})"}}, + {" call", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":" call"}})"}}, + {" fun", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":" fun"}})"}}, + {"ction.", ov::genai::GenerationFinishReason::NONE, {R"({"delta":{"content":"ction."}})"}}, + {"<|end|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + // Tool 1 + {"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"analysis", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" to=", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"fun", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ctions", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {".hello ", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|message|>", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":0,\"function\":{\"name\":\"hello\"}}]}}"}, + {" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" {\""}}]}})"}, + {"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]}})"}, + {"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":"}}]}})"}, + {" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \""}}]}})"}, + {"Paris", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"Paris"}}]}})"}, + {"\"}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"}, + {"<|call|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + // Tool 2 (with ignored constrain) + {"<|channel|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"analysis", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" to=", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"fun", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"ctions", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {".world ", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|constrain|>", ov::genai::GenerationFinishReason::NONE, "{\"delta\":{\"tool_calls\":[{\"id\":\"XXXXXXXXX\",\"type\":\"function\",\"index\":1,\"function\":{\"name\":\"world\"}}]}}"}, + {"json", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"<|message|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" {\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":" {\""}}]}})"}, + {"location", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"location"}}]}})"}, + {"\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\":"}}]}})"}, + {" \"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":" \""}}]}})"}, + {"Warsaw", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"Warsaw"}}]}})"}, + {"\"}", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":1,"function":{"arguments":"\"}"}}]}})"}, + {"<|call|>", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + }; + test(chunkToDeltaVec); +}