diff --git a/apps/miroflow-agent/benchmarks/common_benchmark.py b/apps/miroflow-agent/benchmarks/common_benchmark.py index 4b26138a..5e042a6f 100644 --- a/apps/miroflow-agent/benchmarks/common_benchmark.py +++ b/apps/miroflow-agent/benchmarks/common_benchmark.py @@ -347,6 +347,9 @@ def extract_timestamp(file_path): while format_retry_count <= max_format_retries: try: + # Check if this is the final retry (no more chances after this) + is_final_retry = format_retry_count == max_format_retries + ( response, final_boxed_answer, @@ -362,6 +365,7 @@ def extract_timestamp(file_path): output_formatter=self.output_formatter, ground_truth=task.ground_truth, log_dir=str(self.get_log_dir()), + is_final_retry=is_final_retry, ) attempt_result["model_boxed_answer"] = ( diff --git a/apps/miroflow-agent/src/core/answer_generator.py b/apps/miroflow-agent/src/core/answer_generator.py index fb2a0c69..c0d14363 100644 --- a/apps/miroflow-agent/src/core/answer_generator.py +++ b/apps/miroflow-agent/src/core/answer_generator.py @@ -466,6 +466,7 @@ async def generate_and_finalize_answer( turn_count: int, task_description: str, reached_max_turns: bool = False, + is_final_retry: bool = False, save_callback=None, ) -> Tuple[str, str, Optional[str], str, List[Dict[str, Any]]]: """ @@ -499,9 +500,10 @@ async def generate_and_finalize_answer( failure_experience_summary = None usage_log = "" - # CASE: Context management ON + reached max turns + # CASE: Context management ON + reached max turns + NOT final retry # Skip answer generation entirely - any answer would be a blind guess - if context_management_enabled and reached_max_turns: + # But if this is the final retry, we still try to generate an answer (last chance) + if context_management_enabled and reached_max_turns and not is_final_retry: self.task_log.log_step( "info", "Main Agent | Final Answer (Context Management Mode)", @@ -524,6 +526,7 @@ async def generate_and_finalize_answer( ) # ALL OTHER CASES: Generate final answer first + # (including final retry with reached_max_turns - last chance to get an answer) ( final_answer_text, final_summary, @@ -541,14 +544,21 @@ async def generate_and_finalize_answer( if save_callback: save_callback(system_prompt, message_history) - # CASE: Context management OFF + # CASE: Context management OFF or final retry # Try to use intermediate answers as fallback to maximize accuracy - if not context_management_enabled: + # For final retry, there's no more retry opportunity, so we use fallback + if not context_management_enabled or is_final_retry: final_answer_text, final_summary, final_boxed_answer = ( self.handle_no_context_management_fallback( final_answer_text, final_summary, final_boxed_answer ) ) + if is_final_retry: + self.task_log.log_step( + "info", + "Main Agent | Final Answer (Final Retry)", + "This is the final retry. Using intermediate fallback if available.", + ) return ( final_summary, final_boxed_answer, @@ -557,7 +567,7 @@ async def generate_and_finalize_answer( message_history, ) - # CASE: Context management ON + normal completion (not reached max turns) + # CASE: Context management ON + normal completion (not reached max turns, not final retry) # Don't use fallback - wrong guess would reduce accuracy final_answer_text, final_summary, final_boxed_answer = ( self.handle_context_management_no_fallback( diff --git a/apps/miroflow-agent/src/core/orchestrator.py b/apps/miroflow-agent/src/core/orchestrator.py index cf84d534..e3c88efc 100644 --- a/apps/miroflow-agent/src/core/orchestrator.py +++ b/apps/miroflow-agent/src/core/orchestrator.py @@ -734,7 +734,11 @@ async def run_sub_agent( return final_answer_text async def run_main_agent( - self, task_description, task_file_name=None, task_id="default_task" + self, + task_description, + task_file_name=None, + task_id="default_task", + is_final_retry=False, ): """ Execute the main end-to-end task. @@ -1170,6 +1174,7 @@ async def run_main_agent( turn_count=turn_count, task_description=task_description, reached_max_turns=reached_max_turns, + is_final_retry=is_final_retry, save_callback=self._save_message_history, ) diff --git a/apps/miroflow-agent/src/core/pipeline.py b/apps/miroflow-agent/src/core/pipeline.py index 3e6f18f4..7a8c8967 100644 --- a/apps/miroflow-agent/src/core/pipeline.py +++ b/apps/miroflow-agent/src/core/pipeline.py @@ -45,6 +45,7 @@ async def execute_task_pipeline( stream_queue: Optional[Any] = None, tool_definitions: Optional[List[Dict[str, Any]]] = None, sub_agent_tool_definitions: Optional[Dict[str, List[Dict[str, Any]]]] = None, + is_final_retry: bool = False, ): """ Executes the full pipeline for a single task. @@ -118,6 +119,7 @@ async def execute_task_pipeline( task_description=task_description, task_file_name=task_file_name, task_id=task_id, + is_final_retry=is_final_retry, ) llm_client.close()