From 97886acc9b974d1a2daa77810f7c0e19a32025cc Mon Sep 17 00:00:00 2001 From: Farook Al-Sammarraie Date: Tue, 18 Nov 2025 00:24:13 +0300 Subject: [PATCH 1/2] added json and language validators for IFEval --- WORKSPACE | 10 + flutter/cpp/datasets/ifeval_utils/BUILD | 2 + flutter/cpp/datasets/ifeval_utils/json.h | 489 ++++++++++++++++++++++ flutter/cpp/datasets/ifeval_utils/types.h | 92 +--- third_party/cld2.BUILD | 40 ++ 5 files changed, 548 insertions(+), 85 deletions(-) create mode 100644 flutter/cpp/datasets/ifeval_utils/json.h create mode 100644 third_party/cld2.BUILD diff --git a/WORKSPACE b/WORKSPACE index 324fb6bb0..148512d1c 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -60,6 +60,16 @@ http_archive( ], ) +http_archive( + name = "cld2", + build_file = "@//third_party:cld2.BUILD", + sha256 = "6d8681eadbb64d8fcd3c6c620e81bed16d99ff75627324d66ee2d54bf2b7d749", + strip_prefix = "cld2-b56fa78a2fe44ac2851bae5bf4f4693a0644da7b", + urls = [ + "https://github.com/CLD2Owners/cld2/archive/b56fa78a2fe44ac2851bae5bf4f4693a0644da7b.zip", + ], +) + http_archive( name = "org_tensorflow", patch_args = ["-p1"], diff --git a/flutter/cpp/datasets/ifeval_utils/BUILD b/flutter/cpp/datasets/ifeval_utils/BUILD index dde8f8a72..934da1687 100644 --- a/flutter/cpp/datasets/ifeval_utils/BUILD +++ b/flutter/cpp/datasets/ifeval_utils/BUILD @@ -23,6 +23,7 @@ cc_library( hdrs = [ "common.h", "types.h", + "json.h", ], copts = select({ "//flutter/android/commonlibs:use_asan": [ @@ -34,5 +35,6 @@ cc_library( "//conditions:default": [], }), deps = [ + "@cld2//:cld2", ], ) diff --git a/flutter/cpp/datasets/ifeval_utils/json.h b/flutter/cpp/datasets/ifeval_utils/json.h new file mode 100644 index 000000000..9c45aedea --- /dev/null +++ b/flutter/cpp/datasets/ifeval_utils/json.h @@ -0,0 +1,489 @@ +/*BSD 3-Clause License + +Copyright (c) 2014-2017, ipkn + 2020-2025, CrowCpp +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the author nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace crow { +namespace json { + +enum class type { + Null, + False, + True, + Number, + String, + List, + Object +}; + +enum class num_type { + Null, + Signed_integer, + Unsigned_integer, + Floating_point +}; + +class rvalue { + public: + using list = std::vector; + using object = std::map; + + rvalue() : t_(type::Null), nt_(num_type::Null), error_(true) {} // default = invalid + + explicit operator bool() const noexcept { return !error_; } + + type t() const noexcept { return t_; } + num_type nt() const noexcept { return nt_; } + + // Accessors (no bounds checking, for brevity) + const list& as_list() const { return list_; } + const object& as_object() const { return object_; } + const std::string& as_string() const { return str_; } + + int64_t as_i() const { return i_; } + uint64_t as_u() const { return u_; } + double as_d() const { return d_; } + + // Convenience index operators (no error checking) + const rvalue& operator[](std::size_t idx) const { + static rvalue invalid; + if (t_ != type::List || idx >= list_.size()) + return invalid; + return list_[idx]; + } + + const rvalue& operator[](const std::string& key) const { + static rvalue invalid; + if (t_ != type::Object) return invalid; + auto it = object_.find(key); + if (it == object_.end()) return invalid; + return it->second; + } + + bool is_valid() const noexcept { return !error_; } + + private: + friend class parser; + + void set_error() noexcept { error_ = true; } + + void set_null() noexcept { + t_ = type::Null; + nt_ = num_type::Null; + } + + void set_bool(bool v) noexcept { + t_ = v ? type::True : type::False; + nt_ = num_type::Null; + } + + void set_number_signed(int64_t v) noexcept { + t_ = type::Number; + nt_ = num_type::Signed_integer; + i_ = v; + u_ = static_cast(v); + d_ = static_cast(v); + } + + void set_number_unsigned(uint64_t v) noexcept { + t_ = type::Number; + nt_ = num_type::Unsigned_integer; + u_ = v; + i_ = static_cast(v); + d_ = static_cast(v); + } + + void set_number_double(double v) noexcept { + t_ = type::Number; + nt_ = num_type::Floating_point; + d_ = v; + i_ = static_cast(v); + u_ = static_cast(v); + } + + void set_string(std::string v) { + t_ = type::String; + nt_ = num_type::Null; + str_ = std::move(v); + } + + void set_list(list v) { + t_ = type::List; + nt_ = num_type::Null; + list_ = std::move(v); + } + + void set_object(object v) { + t_ = type::Object; + nt_ = num_type::Null; + object_ = std::move(v); + } + + type t_; + num_type nt_; + bool error_ = false; + + // basic storage + int64_t i_ = 0; + uint64_t u_ = 0; + double d_ = 0.0; + std::string str_; + list list_; + object object_; +}; + +class parser { + public: + parser(const char* begin, std::size_t size) + : cur_(begin), end_(begin + size) {} + + rvalue parse() { + rvalue rv; + skip_ws(); + parse_value(rv); + if (!rv) return rv; + + skip_ws(); + if (cur_ != end_) { + // trailing garbage + rv.set_error(); + } + return rv; + } + + private: + void skip_ws() { + while (cur_ != end_ && std::isspace(static_cast(*cur_))) + ++cur_; + } + + void parse_value(rvalue& out) { + if (cur_ == end_) { + out.set_error(); + return; + } + + switch (*cur_) { + case 'n': parse_null(out); break; + case 't': parse_true(out); break; + case 'f': parse_false(out); break; + case '"': parse_string(out); break; + case '[': parse_array(out); break; + case '{': parse_object(out); break; + default: + if (*cur_ == '-' || std::isdigit(static_cast(*cur_))) { + parse_number(out); + } else { + out.set_error(); + } + break; + } + } + + void parse_null(rvalue& out) { + if (consume_literal("null")) { + out.set_null(); + out.error_ = false; + } else { + out.set_error(); + } + } + + void parse_true(rvalue& out) { + if (consume_literal("true")) { + out.set_bool(true); + out.error_ = false; + } else { + out.set_error(); + } + } + + void parse_false(rvalue& out) { + if (consume_literal("false")) { + out.set_bool(false); + out.error_ = false; + } else { + out.set_error(); + } + } + + bool consume_literal(const char* lit) { + const char* p = cur_; + while (*lit && p != end_ && *p == *lit) { + ++p; ++lit; + } + if (*lit == '\0') { + cur_ = p; + return true; + } + return false; + } + + void parse_string(rvalue& out) { + if (cur_ == end_ || *cur_ != '"') { + out.set_error(); + return; + } + ++cur_; // skip opening quote + + std::string result; + while (cur_ != end_) { + char c = *cur_++; + if (c == '"') { + out.set_string(std::move(result)); + out.error_ = false; + return; + } + if (c == '\\') { + if (cur_ == end_) { out.set_error(); return; } + char esc = *cur_++; + switch (esc) { + case '"': result.push_back('"'); break; + case '\\': result.push_back('\\'); break; + case '/': result.push_back('/'); break; + case 'b': result.push_back('\b'); break; + case 'f': result.push_back('\f'); break; + case 'n': result.push_back('\n'); break; + case 'r': result.push_back('\r'); break; + case 't': result.push_back('\t'); break; + case 'u': + // minimal \uXXXX handling: skip 4 hex digits, no actual UTF-16 decode + if (end_ - cur_ < 4) { out.set_error(); return; } + for (int i = 0; i < 4; ++i) { + if (!std::isxdigit(static_cast(cur_[i]))) { + out.set_error(); + return; + } + } + // Just store as-is or replace with '?' + result.push_back('?'); + cur_ += 4; + break; + default: + out.set_error(); + return; + } + } else { + result.push_back(c); + } + } + // Unterminated string + out.set_error(); + } + + void parse_number(rvalue& out) { + const char* start = cur_; + + if (*cur_ == '-') ++cur_; + if (cur_ == end_) { out.set_error(); return; } + + if (*cur_ == '0') { + ++cur_; + } else if (std::isdigit(static_cast(*cur_))) { + while (cur_ != end_ && std::isdigit(static_cast(*cur_))) + ++cur_; + } else { + out.set_error(); + return; + } + + bool is_float = false; + if (cur_ != end_ && *cur_ == '.') { + is_float = true; + ++cur_; + if (cur_ == end_ || !std::isdigit(static_cast(*cur_))) { + out.set_error(); + return; + } + while (cur_ != end_ && std::isdigit(static_cast(*cur_))) + ++cur_; + } + + if (cur_ != end_ && (*cur_ == 'e' || *cur_ == 'E')) { + is_float = true; + ++cur_; + if (cur_ != end_ && (*cur_ == '+' || *cur_ == '-')) + ++cur_; + if (cur_ == end_ || !std::isdigit(static_cast(*cur_))) { + out.set_error(); + return; + } + while (cur_ != end_ && std::isdigit(static_cast(*cur_))) + ++cur_; + } + + std::string num_str(start, cur_); + char* endptr = nullptr; + + if (is_float) { + double v = std::strtod(num_str.c_str(), &endptr); + if (endptr != num_str.c_str() + num_str.size()) { + out.set_error(); + return; + } + out.set_number_double(v); + } else { + bool negative = (num_str[0] == '-'); + if (negative) { + long long v = std::strtoll(num_str.c_str(), &endptr, 10); + if (endptr != num_str.c_str() + num_str.size()) { + out.set_error(); + return; + } + out.set_number_signed(static_cast(v)); + } else { + unsigned long long v = std::strtoull(num_str.c_str(), &endptr, 10); + if (endptr != num_str.c_str() + num_str.size()) { + out.set_error(); + return; + } + out.set_number_unsigned(static_cast(v)); + } + } + out.error_ = false; + } + + void parse_array(rvalue& out) { + if (*cur_ != '[') { out.set_error(); return; } + ++cur_; + skip_ws(); + + rvalue::list elems; + if (cur_ != end_ && *cur_ == ']') { + ++cur_; + out.set_list(std::move(elems)); + out.error_ = false; + return; + } + + while (true) { + rvalue elem; + skip_ws(); + parse_value(elem); + if (!elem) { out.set_error(); return; } + elems.push_back(std::move(elem)); + + skip_ws(); + if (cur_ == end_) { out.set_error(); return; } + if (*cur_ == ',') { + ++cur_; + skip_ws(); + continue; + } + if (*cur_ == ']') { + ++cur_; + break; + } + out.set_error(); + return; + } + + out.set_list(std::move(elems)); + out.error_ = false; + } + + void parse_object(rvalue& out) { + if (*cur_ != '{') { out.set_error(); return; } + ++cur_; + skip_ws(); + + rvalue::object obj; + if (cur_ != end_ && *cur_ == '}') { + ++cur_; + out.set_object(std::move(obj)); + out.error_ = false; + return; + } + + while (true) { + skip_ws(); + rvalue key_rv; + parse_string(key_rv); + if (!key_rv || key_rv.t() != type::String) { out.set_error(); return; } + std::string key = key_rv.as_string(); + + skip_ws(); + if (cur_ == end_ || *cur_ != ':') { out.set_error(); return; } + ++cur_; + + skip_ws(); + rvalue value_rv; + parse_value(value_rv); + if (!value_rv) { out.set_error(); return; } + + obj.emplace(std::move(key), std::move(value_rv)); + + skip_ws(); + if (cur_ == end_) { out.set_error(); return; } + if (*cur_ == ',') { + ++cur_; + skip_ws(); + continue; + } + if (*cur_ == '}') { + ++cur_; + break; + } + out.set_error(); + return; + } + + out.set_object(std::move(obj)); + out.error_ = false; + } + + const char* cur_; + const char* end_; +}; + +inline rvalue load(const char* data, std::size_t size) { + parser p(data, size); + return p.parse(); +} + +inline rvalue load(const char* data) { + std::size_t len = 0; + while (data[len] != '\0') ++len; + return load(data, len); +} + +inline rvalue load(const std::string& s) { + return load(s.data(), s.size()); +} + +} // namespace json +} // namespace crow diff --git a/flutter/cpp/datasets/ifeval_utils/types.h b/flutter/cpp/datasets/ifeval_utils/types.h index b17e7f8ef..b2ddf124f 100644 --- a/flutter/cpp/datasets/ifeval_utils/types.h +++ b/flutter/cpp/datasets/ifeval_utils/types.h @@ -9,6 +9,8 @@ #include #include "flutter/cpp/datasets/ifeval_utils/common.h" +#include "flutter/cpp/datasets/ifeval_utils/json.h" +#include "compact_lang_det.h" namespace mlperf { namespace mobile { @@ -234,40 +236,11 @@ class JsonFormat : public Instruction { constexpr InstructionGroup Group() override { return DETECTABLE_FORMAT; } private: - // TODO possibly use a C++ json validator instead virtual bool verify_(const std::string& resp) const override { std::string t = resp; if (t.empty()) return false; - if (!((t.front() == '{' && t.back() == '}') || - (t.front() == '[' && t.back() == ']'))) - return false; - int brace = 0, bracket = 0; - bool in_str = false, esc = false; - for (char c : t) { - if (esc) { - esc = false; - continue; - } - if (c == '\\') { - esc = true; - continue; - } - if (c == '"') { - in_str = !in_str; - continue; - } - if (in_str) continue; - if (c == '{') - ++brace; - else if (c == '}') - --brace; - else if (c == '[') - ++bracket; - else if (c == ']') - --bracket; - if (brace < 0 || bracket < 0) return false; - } - return brace == 0 && bracket == 0 && !in_str; + crow::json::rvalue jv = crow::json::load(t); + return jv.is_valid(); } }; @@ -534,60 +507,9 @@ class ResponseLanguage : public Instruction { inline bool LanguageHeuristic(const std::string& text, const std::string& lang) const { - std::string L = tolower(lang); - const std::string& t = text; - - auto non_ascii_ratio = [&]() { - size_t non_ascii = 0, total = 0; - for (unsigned char c : t) { - if (std::isalpha(c)) { - ++total; - if (c >= 128) ++non_ascii; - } - } - return total == 0 ? 0.0 : (double)non_ascii / (double)total; - }; - - if (L == "en") { - return non_ascii_ratio() < 0.05; - } - if (L == "tr") { - return t.find("ğ") != std::string::npos || - t.find("Ğ") != std::string::npos || - t.find("ş") != std::string::npos || - t.find("Ş") != std::string::npos || - t.find("ı") != std::string::npos || - t.find("İ") != std::string::npos || - t.find("ö") != std::string::npos || - t.find("Ö") != std::string::npos || - t.find("ç") != std::string::npos || - t.find("Ç") != std::string::npos || - t.find("ü") != std::string::npos || - t.find("Ü") != std::string::npos; - } - if (L == "es") { - return t.find("ñ") != std::string::npos || - t.find("Ñ") != std::string::npos || - t.find("á") != std::string::npos || - t.find("é") != std::string::npos || - t.find("í") != std::string::npos || - t.find("ó") != std::string::npos || - t.find("ú") != std::string::npos; - } - if (L == "fr") { - return t.find("é") != std::string::npos || - t.find("è") != std::string::npos || - t.find("ê") != std::string::npos || - t.find("ç") != std::string::npos || - t.find("à") != std::string::npos; - } - if (L == "de") { - return t.find("ä") != std::string::npos || - t.find("ö") != std::string::npos || - t.find("ü") != std::string::npos || - t.find("ß") != std::string::npos; - } - return non_ascii_ratio() > 0.05; + bool is_reliable = true; + std::string detected_lang(CLD2::LanguageCode(CLD2::DetectLanguage(text.c_str(), text.size(), true, &is_reliable))); + return detected_lang == lang; } virtual bool verify_(const std::string& resp) const override { diff --git a/third_party/cld2.BUILD b/third_party/cld2.BUILD new file mode 100644 index 000000000..b5e34c542 --- /dev/null +++ b/third_party/cld2.BUILD @@ -0,0 +1,40 @@ +cc_library( + name = "cld2", + srcs = [ + "internal/cldutil.cc", + "internal/cldutil_shared.cc", + "internal/compact_lang_det.cc", + "internal/compact_lang_det_hint_code.cc", + "internal/compact_lang_det_impl.cc", + "internal/debug.cc", + "internal/fixunicodevalue.cc", + "internal/generated_entities.cc", + "internal/generated_language.cc", + "internal/generated_ulscript.cc", + "internal/getonescriptspan.cc", + "internal/lang_script.cc", + "internal/offsetmap.cc", + "internal/scoreonescriptspan.cc", + "internal/tote.cc", + "internal/utf8statetable.cc", + "internal/cld_generated_cjk_uni_prop_80.cc", + "internal/cld2_generated_cjk_compatible.cc", + "internal/cld_generated_cjk_delta_bi_4.cc", + "internal/generated_distinct_bi_0.cc", + "internal/cld2_generated_quadchrome_2.cc", + "internal/cld2_generated_deltaoctachrome.cc", + "internal/cld2_generated_distinctoctachrome.cc", + "internal/cld_generated_score_quad_octa_2.cc", + ], + hdrs = glob([ + "internal/*.h", + "internal/*.hh", + ]), + includes = [ + "public", + ], + copts = [ + "-std=c++11", + ], + visibility = ["//visibility:public"], +) From 4727d691aab45602049043b7e41e4962fda4880e Mon Sep 17 00:00:00 2001 From: Farook Al-Sammarraie Date: Tue, 18 Nov 2025 01:50:07 +0300 Subject: [PATCH 2/2] formatting --- flutter/cpp/datasets/ifeval_utils/BUILD | 4 +- flutter/cpp/datasets/ifeval_utils/json.h | 225 +++++++++++++--------- flutter/cpp/datasets/ifeval_utils/types.h | 5 +- 3 files changed, 142 insertions(+), 92 deletions(-) diff --git a/flutter/cpp/datasets/ifeval_utils/BUILD b/flutter/cpp/datasets/ifeval_utils/BUILD index 934da1687..c13cf74f2 100644 --- a/flutter/cpp/datasets/ifeval_utils/BUILD +++ b/flutter/cpp/datasets/ifeval_utils/BUILD @@ -22,8 +22,8 @@ cc_library( name = "ifeval_utils", hdrs = [ "common.h", - "types.h", "json.h", + "types.h", ], copts = select({ "//flutter/android/commonlibs:use_asan": [ @@ -35,6 +35,6 @@ cc_library( "//conditions:default": [], }), deps = [ - "@cld2//:cld2", + "@cld2", ], ) diff --git a/flutter/cpp/datasets/ifeval_utils/json.h b/flutter/cpp/datasets/ifeval_utils/json.h index 9c45aedea..03a7c7eeb 100644 --- a/flutter/cpp/datasets/ifeval_utils/json.h +++ b/flutter/cpp/datasets/ifeval_utils/json.h @@ -30,59 +30,48 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/ #pragma once -#include -#include -#include #include #include #include +#include +#include +#include namespace crow { namespace json { -enum class type { - Null, - False, - True, - Number, - String, - List, - Object -}; +enum class type { Null, False, True, Number, String, List, Object }; -enum class num_type { - Null, - Signed_integer, - Unsigned_integer, - Floating_point -}; +enum class num_type { Null, Signed_integer, Unsigned_integer, Floating_point }; class rvalue { public: - using list = std::vector; + using list = std::vector; using object = std::map; - rvalue() : t_(type::Null), nt_(num_type::Null), error_(true) {} // default = invalid + rvalue() + : t_(type::Null), + nt_(num_type::Null), + error_(true) {} // default = invalid explicit operator bool() const noexcept { return !error_; } - type t() const noexcept { return t_; } - num_type nt() const noexcept { return nt_; } + type t() const noexcept { return t_; } + num_type nt() const noexcept { return nt_; } - // Accessors (no bounds checking, for brevity) - const list& as_list() const { return list_; } + // Accessors (no bounds checking, for brevity) + const list& as_list() const { return list_; } const object& as_object() const { return object_; } const std::string& as_string() const { return str_; } - int64_t as_i() const { return i_; } - uint64_t as_u() const { return u_; } - double as_d() const { return d_; } + int64_t as_i() const { return i_; } + uint64_t as_u() const { return u_; } + double as_d() const { return d_; } - // Convenience index operators (no error checking) + // Convenience index operators (no error checking) const rvalue& operator[](std::size_t idx) const { static rvalue invalid; - if (t_ != type::List || idx >= list_.size()) - return invalid; + if (t_ != type::List || idx >= list_.size()) return invalid; return list_[idx]; } @@ -112,58 +101,58 @@ class rvalue { } void set_number_signed(int64_t v) noexcept { - t_ = type::Number; + t_ = type::Number; nt_ = num_type::Signed_integer; - i_ = v; - u_ = static_cast(v); - d_ = static_cast(v); + i_ = v; + u_ = static_cast(v); + d_ = static_cast(v); } void set_number_unsigned(uint64_t v) noexcept { - t_ = type::Number; + t_ = type::Number; nt_ = num_type::Unsigned_integer; - u_ = v; - i_ = static_cast(v); - d_ = static_cast(v); + u_ = v; + i_ = static_cast(v); + d_ = static_cast(v); } void set_number_double(double v) noexcept { - t_ = type::Number; + t_ = type::Number; nt_ = num_type::Floating_point; - d_ = v; - i_ = static_cast(v); - u_ = static_cast(v); + d_ = v; + i_ = static_cast(v); + u_ = static_cast(v); } void set_string(std::string v) { - t_ = type::String; + t_ = type::String; nt_ = num_type::Null; str_ = std::move(v); } void set_list(list v) { - t_ = type::List; + t_ = type::List; nt_ = num_type::Null; list_ = std::move(v); } void set_object(object v) { - t_ = type::Object; + t_ = type::Object; nt_ = num_type::Null; object_ = std::move(v); } - type t_; + type t_; num_type nt_; - bool error_ = false; + bool error_ = false; - // basic storage - int64_t i_ = 0; - uint64_t u_ = 0; - double d_ = 0.0; + // basic storage + int64_t i_ = 0; + uint64_t u_ = 0; + double d_ = 0.0; std::string str_; - list list_; - object object_; + list list_; + object object_; }; class parser { @@ -198,12 +187,24 @@ class parser { } switch (*cur_) { - case 'n': parse_null(out); break; - case 't': parse_true(out); break; - case 'f': parse_false(out); break; - case '"': parse_string(out); break; - case '[': parse_array(out); break; - case '{': parse_object(out); break; + case 'n': + parse_null(out); + break; + case 't': + parse_true(out); + break; + case 'f': + parse_false(out); + break; + case '"': + parse_string(out); + break; + case '[': + parse_array(out); + break; + case '{': + parse_object(out); + break; default: if (*cur_ == '-' || std::isdigit(static_cast(*cur_))) { parse_number(out); @@ -244,7 +245,8 @@ class parser { bool consume_literal(const char* lit) { const char* p = cur_; while (*lit && p != end_ && *p == *lit) { - ++p; ++lit; + ++p; + ++lit; } if (*lit == '\0') { cur_ = p; @@ -258,7 +260,7 @@ class parser { out.set_error(); return; } - ++cur_; // skip opening quote + ++cur_; // skip opening quote std::string result; while (cur_ != end_) { @@ -269,20 +271,43 @@ class parser { return; } if (c == '\\') { - if (cur_ == end_) { out.set_error(); return; } + if (cur_ == end_) { + out.set_error(); + return; + } char esc = *cur_++; switch (esc) { - case '"': result.push_back('"'); break; - case '\\': result.push_back('\\'); break; - case '/': result.push_back('/'); break; - case 'b': result.push_back('\b'); break; - case 'f': result.push_back('\f'); break; - case 'n': result.push_back('\n'); break; - case 'r': result.push_back('\r'); break; - case 't': result.push_back('\t'); break; + case '"': + result.push_back('"'); + break; + case '\\': + result.push_back('\\'); + break; + case '/': + result.push_back('/'); + break; + case 'b': + result.push_back('\b'); + break; + case 'f': + result.push_back('\f'); + break; + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 't': + result.push_back('\t'); + break; case 'u': - // minimal \uXXXX handling: skip 4 hex digits, no actual UTF-16 decode - if (end_ - cur_ < 4) { out.set_error(); return; } + // minimal \uXXXX handling: skip 4 hex digits, no actual UTF-16 + // decode + if (end_ - cur_ < 4) { + out.set_error(); + return; + } for (int i = 0; i < 4; ++i) { if (!std::isxdigit(static_cast(cur_[i]))) { out.set_error(); @@ -309,7 +334,10 @@ class parser { const char* start = cur_; if (*cur_ == '-') ++cur_; - if (cur_ == end_) { out.set_error(); return; } + if (cur_ == end_) { + out.set_error(); + return; + } if (*cur_ == '0') { ++cur_; @@ -336,8 +364,7 @@ class parser { if (cur_ != end_ && (*cur_ == 'e' || *cur_ == 'E')) { is_float = true; ++cur_; - if (cur_ != end_ && (*cur_ == '+' || *cur_ == '-')) - ++cur_; + if (cur_ != end_ && (*cur_ == '+' || *cur_ == '-')) ++cur_; if (cur_ == end_ || !std::isdigit(static_cast(*cur_))) { out.set_error(); return; @@ -378,7 +405,10 @@ class parser { } void parse_array(rvalue& out) { - if (*cur_ != '[') { out.set_error(); return; } + if (*cur_ != '[') { + out.set_error(); + return; + } ++cur_; skip_ws(); @@ -394,11 +424,17 @@ class parser { rvalue elem; skip_ws(); parse_value(elem); - if (!elem) { out.set_error(); return; } + if (!elem) { + out.set_error(); + return; + } elems.push_back(std::move(elem)); skip_ws(); - if (cur_ == end_) { out.set_error(); return; } + if (cur_ == end_) { + out.set_error(); + return; + } if (*cur_ == ',') { ++cur_; skip_ws(); @@ -417,7 +453,10 @@ class parser { } void parse_object(rvalue& out) { - if (*cur_ != '{') { out.set_error(); return; } + if (*cur_ != '{') { + out.set_error(); + return; + } ++cur_; skip_ws(); @@ -433,22 +472,34 @@ class parser { skip_ws(); rvalue key_rv; parse_string(key_rv); - if (!key_rv || key_rv.t() != type::String) { out.set_error(); return; } + if (!key_rv || key_rv.t() != type::String) { + out.set_error(); + return; + } std::string key = key_rv.as_string(); skip_ws(); - if (cur_ == end_ || *cur_ != ':') { out.set_error(); return; } + if (cur_ == end_ || *cur_ != ':') { + out.set_error(); + return; + } ++cur_; skip_ws(); rvalue value_rv; parse_value(value_rv); - if (!value_rv) { out.set_error(); return; } + if (!value_rv) { + out.set_error(); + return; + } obj.emplace(std::move(key), std::move(value_rv)); skip_ws(); - if (cur_ == end_) { out.set_error(); return; } + if (cur_ == end_) { + out.set_error(); + return; + } if (*cur_ == ',') { ++cur_; skip_ws(); @@ -481,9 +532,7 @@ inline rvalue load(const char* data) { return load(data, len); } -inline rvalue load(const std::string& s) { - return load(s.data(), s.size()); -} +inline rvalue load(const std::string& s) { return load(s.data(), s.size()); } -} // namespace json -} // namespace crow +} // namespace json +} // namespace crow diff --git a/flutter/cpp/datasets/ifeval_utils/types.h b/flutter/cpp/datasets/ifeval_utils/types.h index b2ddf124f..5f7f020c7 100644 --- a/flutter/cpp/datasets/ifeval_utils/types.h +++ b/flutter/cpp/datasets/ifeval_utils/types.h @@ -8,9 +8,9 @@ #include #include +#include "compact_lang_det.h" #include "flutter/cpp/datasets/ifeval_utils/common.h" #include "flutter/cpp/datasets/ifeval_utils/json.h" -#include "compact_lang_det.h" namespace mlperf { namespace mobile { @@ -508,7 +508,8 @@ class ResponseLanguage : public Instruction { inline bool LanguageHeuristic(const std::string& text, const std::string& lang) const { bool is_reliable = true; - std::string detected_lang(CLD2::LanguageCode(CLD2::DetectLanguage(text.c_str(), text.size(), true, &is_reliable))); + std::string detected_lang(CLD2::LanguageCode( + CLD2::DetectLanguage(text.c_str(), text.size(), true, &is_reliable))); return detected_lang == lang; }