From 69cb8b94cfdf4e9cb7a17b4b057045406223e58b Mon Sep 17 00:00:00 2001
From: Farook Al-Sammarraie <farook.a@scopicsoftware.com>
Date: Mon, 17 Nov 2025 04:19:17 +0300
Subject: [PATCH 1/3] added input token limit to pipeline backend config

---
 mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc | 9 +++++++++
 mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h  | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
index 33150efeb..24cf6e4b1 100644
--- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
@@ -258,6 +258,15 @@ mlperf_status_t LLMPipeline::backend_set_input(mlperf_backend_ptr_t backend_ptr,
   backend_data->tensors.get_tensors(backend_data->prefill_runner,
                                     backend_data->decode_runner);
 
+  if (effective_prefill_token_size + 1 > backend_data->max_input_tokens) {
+    LOG(ERROR) << "Input size ("
+               << std::to_string(effective_prefill_token_size + 1)
+               << ") exceeds configured input limit ("
+               << std::to_string(backend_data->max_input_tokens) << ")."
+               << std::endl;
+    return MLPERF_FAILURE;
+  }
+
   if (effective_prefill_token_size + 1 >
       backend_data->tensors.kv_cache_k_0()->dims->data[1]) {
     LOG(ERROR) << "Input size ("
diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
index f7552cd4d..62d7fad58 100644
--- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
@@ -151,8 +151,9 @@ struct LLMBackendData {
   kv_cache_t kv_cache;
   std::vector<int> prompt_tokens;
   std::vector<int> output_tokens;
-  uint8_t threads = 2;
-  int max_output_tokens = 1024;
+  uint8_t threads = 8;
+  int max_output_tokens = 128;
+  int max_input_tokens = 2048;
   std::unordered_set<int> stop_token_ids{128001, 128008, 128009};
 
   LLMBackendData() {}

From 7875f2a6b3bcf8070d01a76b0a38cfbbe2df5bfd Mon Sep 17 00:00:00 2001
From: Farook Al-Sammarraie <farook.a@scopicsoftware.com>
Date: Mon, 24 Nov 2025 07:28:25 +0300
Subject: [PATCH 2/3] changed input limit to be part of the dataset

---
 flutter/cpp/datasets/ifeval.cc                        | 6 ++++++
 flutter/cpp/datasets/ifeval.h                         | 1 +
 flutter/cpp/datasets/mmlu_gen.cc                      | 6 ++++++
 flutter/cpp/datasets/mmlu_gen.h                       | 1 +
 mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc | 9 ---------
 mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h  | 1 -
 6 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/flutter/cpp/datasets/ifeval.cc b/flutter/cpp/datasets/ifeval.cc
index 07834ebbd..15d205740 100644
--- a/flutter/cpp/datasets/ifeval.cc
+++ b/flutter/cpp/datasets/ifeval.cc
@@ -32,6 +32,12 @@ IFEval::IFEval(Backend* backend, const std::string& input_tfrecord,
     std::vector<int> input_tokens;
     sp_processor->Encode(input_formatted.c_str(), &input_tokens).ok();
 
+    //input token sanity check
+    if (input_tokens.size() > input_token_limit_) {
+      LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring.";
+      continue;
+    }
+
     auto sample = std::make_unique<ifeval::Sample>();
     sample->key = key;
     sample->prompt = prompt;
diff --git a/flutter/cpp/datasets/ifeval.h b/flutter/cpp/datasets/ifeval.h
index 5ce4ea357..684bca3c0 100644
--- a/flutter/cpp/datasets/ifeval.h
+++ b/flutter/cpp/datasets/ifeval.h
@@ -81,6 +81,7 @@ class IFEval : public Dataset {
   std::unordered_set<size_t> used_sample_ids_;
   std::set<int> loaded_sample_ids_;
   std::unique_ptr<sentencepiece::SentencePieceProcessor> sp_processor;
+  static constexpr int input_token_limit_ = 1024;
   static constexpr int token_limit_ = 1024;
 };
 
diff --git a/flutter/cpp/datasets/mmlu_gen.cc b/flutter/cpp/datasets/mmlu_gen.cc
index 4b79efc44..acdde87ac 100644
--- a/flutter/cpp/datasets/mmlu_gen.cc
+++ b/flutter/cpp/datasets/mmlu_gen.cc
@@ -45,6 +45,12 @@ MmluGen::MmluGen(Backend* backend, const std::string& input_tfrecord,
     std::vector<int> input_tokens;
     sp_processor->Encode(input.c_str(), &input_tokens).ok();
 
+    //input token sanity check
+    if (input_tokens.size() > input_token_limit_) {
+      LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring.";
+      continue;
+    }
+
     auto sample = std::make_unique<PromptSample>();
     sample->input = input;
     sample->input_tokens = input_tokens;
diff --git a/flutter/cpp/datasets/mmlu_gen.h b/flutter/cpp/datasets/mmlu_gen.h
index 18ecc4d0d..8bbb8f544 100644
--- a/flutter/cpp/datasets/mmlu_gen.h
+++ b/flutter/cpp/datasets/mmlu_gen.h
@@ -66,6 +66,7 @@ class MmluGen : public Dataset {
   std::unordered_set<size_t> used_sample_ids_;
   std::set<int> loaded_sample_ids_;
   std::unique_ptr<sentencepiece::SentencePieceProcessor> sp_processor;
+  static constexpr int input_token_limit_ = 1024;
   static constexpr int token_limit_ = 4;
 };
 
diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
index 24cf6e4b1..33150efeb 100644
--- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
+++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.cc
@@ -258,15 +258,6 @@ mlperf_status_t LLMPipeline::backend_set_input(mlperf_backend_ptr_t backend_ptr,
   backend_data->tensors.get_tensors(backend_data->prefill_runner,
                                     backend_data->decode_runner);
 
-  if (effective_prefill_token_size + 1 > backend_data->max_input_tokens) {
-    LOG(ERROR) << "Input size ("
-               << std::to_string(effective_prefill_token_size + 1)
-               << ") exceeds configured input limit ("
-               << std::to_string(backend_data->max_input_tokens) << ")."
-               << std::endl;
-    return MLPERF_FAILURE;
-  }
-
   if (effective_prefill_token_size + 1 >
       backend_data->tensors.kv_cache_k_0()->dims->data[1]) {
     LOG(ERROR) << "Input size ("
diff --git a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
index 62d7fad58..31818ea75 100644
--- a/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
+++ b/mobile_back_tflite/cpp/backend_tflite/llm_pipeline.h
@@ -153,7 +153,6 @@ struct LLMBackendData {
   std::vector<int> output_tokens;
   uint8_t threads = 8;
   int max_output_tokens = 128;
-  int max_input_tokens = 2048;
   std::unordered_set<int> stop_token_ids{128001, 128008, 128009};
 
   LLMBackendData() {}

From 3b39a34d2622a4c1e7d6abe3d0f9e365501c4ee9 Mon Sep 17 00:00:00 2001
From: Farook Al-Sammarraie <farook.a@scopicsoftware.com>
Date: Tue, 25 Nov 2025 06:11:14 +0300
Subject: [PATCH 3/3] formatting

---
 flutter/cpp/datasets/ifeval.cc   | 5 +++--
 flutter/cpp/datasets/mmlu_gen.cc | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/flutter/cpp/datasets/ifeval.cc b/flutter/cpp/datasets/ifeval.cc
index 15d205740..8588bdb6b 100644
--- a/flutter/cpp/datasets/ifeval.cc
+++ b/flutter/cpp/datasets/ifeval.cc
@@ -32,9 +32,10 @@ IFEval::IFEval(Backend* backend, const std::string& input_tfrecord,
     std::vector<int> input_tokens;
     sp_processor->Encode(input_formatted.c_str(), &input_tokens).ok();
 
-    //input token sanity check
+    // input token sanity check
     if (input_tokens.size() > input_token_limit_) {
-      LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring.";
+      LOG(WARNING) << "Input token limit exceeded for entry "
+                   << std::to_string(i) << ". Ignoring.";
       continue;
     }
 
diff --git a/flutter/cpp/datasets/mmlu_gen.cc b/flutter/cpp/datasets/mmlu_gen.cc
index acdde87ac..15358af8b 100644
--- a/flutter/cpp/datasets/mmlu_gen.cc
+++ b/flutter/cpp/datasets/mmlu_gen.cc
@@ -45,9 +45,10 @@ MmluGen::MmluGen(Backend* backend, const std::string& input_tfrecord,
     std::vector<int> input_tokens;
     sp_processor->Encode(input.c_str(), &input_tokens).ok();
 
-    //input token sanity check
+    // input token sanity check
     if (input_tokens.size() > input_token_limit_) {
-      LOG(WARNING) << "Input token limit exceeded for entry " << std::to_string(i) << ". Ignoring.";
+      LOG(WARNING) << "Input token limit exceeded for entry "
+                   << std::to_string(i) << ". Ignoring.";
       continue;
     }