From 69cb8b94cfdf4e9cb7a17b4b057045406223e58b Mon Sep 17 00:00:00 2001 From: Farook Al-Sammarraie Date: Mon, 17 Nov 2025 04:19:17 +0300 Subject: [PATCH 1/3] added input token limit to pipeline backend config --- mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc | 9 +++++++++ mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h | 5 +++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc index 33150efeb..24cf6e4b1 100644 --- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc @@ -258,6 +258,15 @@ mlperf_status_t LLMPipeline::backend_set_input(mlperf_backend_ptr_t backend_ptr, backend_data->tensors.get_tensors(backend_data->prefill_runner, backend_data->decode_runner); + if (effective_prefill_token_size + 1 > backend_data->max_input_tokens) { + LOG(ERROR) << "Input size (" + << std::to_string(effective_prefill_token_size + 1) + << ") exceeds configured input limit (" + << std::to_string(backend_data->max_input_tokens) << ")." + << std::endl; + return MLPERF_FAILURE; + } + if (effective_prefill_token_size + 1 > backend_data->tensors.kv_cache_k_0()->dims->data[1]) { LOG(ERROR) << "Input size (" diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h index f7552cd4d..62d7fad58 100644 --- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h @@ -151,8 +151,9 @@ struct LLMBackendData { kv_cache_t kv_cache; std::vector prompt_tokens; std::vector output_tokens; - uint8_t threads = 2; - int max_output_tokens = 1024; + uint8_t threads = 8; + int max_output_tokens = 128; + int max_input_tokens = 2048; std::unordered_set stop_token_ids{128001, 128008, 128009}; LLMBackendData() {} From 7875f2a6b3bcf8070d01a76b0a38cfbbe2df5bfd Mon Sep 17 00:00:00 2001 From: Farook Al-Sammarraie Date: Mon, 24 Nov 2025 07:28:25 +0300 Subject: [PATCH 2/3] changed input limit to be part of the dataset --- flutter/cpp/datasets/ifeval.cc | 6 ++++++ flutter/cpp/datasets/ifeval.h | 1 + flutter/cpp/datasets/mmlu_gen.cc | 6 ++++++ flutter/cpp/datasets/mmlu_gen.h | 1 + mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc | 9 --------- mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h | 1 - 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/flutter/cpp/datasets/ifeval.cc b/flutter/cpp/datasets/ifeval.cc index 07834ebbd..15d205740 100644 --- a/flutter/cpp/datasets/ifeval.cc +++ b/flutter/cpp/datasets/ifeval.cc @@ -32,6 +32,12 @@ IFEval::IFEval(Backend* backend, const std::string& input_tfrecord, std::vector input_tokens; sp_processor->Encode(input_formatted.c_str(), &input_tokens).ok(); + //input token sanity check + if (input_tokens.size() > input_token_limit_) { + LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring."; + continue; + } + auto sample = std::make_unique(); sample->key = key; sample->prompt = prompt; diff --git a/flutter/cpp/datasets/ifeval.h b/flutter/cpp/datasets/ifeval.h index 5ce4ea357..684bca3c0 100644 --- a/flutter/cpp/datasets/ifeval.h +++ b/flutter/cpp/datasets/ifeval.h @@ -81,6 +81,7 @@ class IFEval : public Dataset { std::unordered_set used_sample_ids_; std::set loaded_sample_ids_; std::unique_ptr sp_processor; + static constexpr int input_token_limit_ = 1024; static constexpr int token_limit_ = 1024; }; diff --git a/flutter/cpp/datasets/mmlu_gen.cc b/flutter/cpp/datasets/mmlu_gen.cc index 4b79efc44..acdde87ac 100644 --- a/flutter/cpp/datasets/mmlu_gen.cc +++ b/flutter/cpp/datasets/mmlu_gen.cc @@ -45,6 +45,12 @@ MmluGen::MmluGen(Backend* backend, const std::string& input_tfrecord, std::vector input_tokens; sp_processor->Encode(input.c_str(), &input_tokens).ok(); + //input token sanity check + if (input_tokens.size() > input_token_limit_) { + LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring."; + continue; + } + auto sample = std::make_unique(); sample->input = input; sample->input_tokens = input_tokens; diff --git a/flutter/cpp/datasets/mmlu_gen.h b/flutter/cpp/datasets/mmlu_gen.h index 18ecc4d0d..8bbb8f544 100644 --- a/flutter/cpp/datasets/mmlu_gen.h +++ b/flutter/cpp/datasets/mmlu_gen.h @@ -66,6 +66,7 @@ class MmluGen : public Dataset { std::unordered_set used_sample_ids_; std::set loaded_sample_ids_; std::unique_ptr sp_processor; + static constexpr int input_token_limit_ = 1024; static constexpr int token_limit_ = 4; }; diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc index 24cf6e4b1..33150efeb 100644 --- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc +++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc @@ -258,15 +258,6 @@ mlperf_status_t LLMPipeline::backend_set_input(mlperf_backend_ptr_t backend_ptr, backend_data->tensors.get_tensors(backend_data->prefill_runner, backend_data->decode_runner); - if (effective_prefill_token_size + 1 > backend_data->max_input_tokens) { - LOG(ERROR) << "Input size (" - << std::to_string(effective_prefill_token_size + 1) - << ") exceeds configured input limit (" - << std::to_string(backend_data->max_input_tokens) << ")." - << std::endl; - return MLPERF_FAILURE; - } - if (effective_prefill_token_size + 1 > backend_data->tensors.kv_cache_k_0()->dims->data[1]) { LOG(ERROR) << "Input size (" diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h index 62d7fad58..31818ea75 100644 --- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h +++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h @@ -153,7 +153,6 @@ struct LLMBackendData { std::vector output_tokens; uint8_t threads = 8; int max_output_tokens = 128; - int max_input_tokens = 2048; std::unordered_set stop_token_ids{128001, 128008, 128009}; LLMBackendData() {} From 3b39a34d2622a4c1e7d6abe3d0f9e365501c4ee9 Mon Sep 17 00:00:00 2001 From: Farook Al-Sammarraie Date: Tue, 25 Nov 2025 06:11:14 +0300 Subject: [PATCH 3/3] formatting --- flutter/cpp/datasets/ifeval.cc | 5 +++-- flutter/cpp/datasets/mmlu_gen.cc | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/flutter/cpp/datasets/ifeval.cc b/flutter/cpp/datasets/ifeval.cc index 15d205740..8588bdb6b 100644 --- a/flutter/cpp/datasets/ifeval.cc +++ b/flutter/cpp/datasets/ifeval.cc @@ -32,9 +32,10 @@ IFEval::IFEval(Backend* backend, const std::string& input_tfrecord, std::vector input_tokens; sp_processor->Encode(input_formatted.c_str(), &input_tokens).ok(); - //input token sanity check + // input token sanity check if (input_tokens.size() > input_token_limit_) { - LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring."; + LOG(WARNING) << "Input token limit exceeded for entry " + << std::to_string(i) << ". Ignoring."; continue; } diff --git a/flutter/cpp/datasets/mmlu_gen.cc b/flutter/cpp/datasets/mmlu_gen.cc index acdde87ac..15358af8b 100644 --- a/flutter/cpp/datasets/mmlu_gen.cc +++ b/flutter/cpp/datasets/mmlu_gen.cc @@ -45,9 +45,10 @@ MmluGen::MmluGen(Backend* backend, const std::string& input_tfrecord, std::vector input_tokens; sp_processor->Encode(input.c_str(), &input_tokens).ok(); - //input token sanity check + // input token sanity check if (input_tokens.size() > input_token_limit_) { - LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring."; + LOG(WARNING) << "Input token limit exceeded for entry " + << std::to_string(i) << ". Ignoring."; continue; }