Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 61 additions & 59 deletions autobot-backend/chat_workflow/tool_handler.py

Large diffs are not rendered by default.

139 changes: 71 additions & 68 deletions autobot-backend/code_analysis/src/env_analyzer.py

Large diffs are not rendered by default.

56 changes: 29 additions & 27 deletions autobot-backend/llm_interface_pkg/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,24 @@
This simplified class delegates to specialized provider modules.
"""

from __future__ import annotations

import asyncio
import logging
import os
import re
import time
import uuid
from typing import Any, Dict, List, Optional
from typing import Any

import aiohttp
import xxhash
from config import ConfigManager
from constants.model_constants import ModelConstants

from autobot_shared.error_boundaries import error_boundary, get_error_boundary_manager
from autobot_shared.http_client import get_http_client
from autobot_shared.tracing import get_tracer
from config import ConfigManager

# Issue #1403: Adapter registry
from .adapters.registry import get_adapter_registry
Expand Down Expand Up @@ -101,7 +103,7 @@ class LLMInterface:
- Structured request/response handling
"""

def __init__(self, settings: Optional[LLMSettings] = None):
def __init__(self, settings: LLMSettings | None = None):
"""
Initialize LLM interface with optional settings and configure providers.

Expand Down Expand Up @@ -192,13 +194,13 @@ def _init_async_components(self) -> None:
Initialize async components including HTTP client and caching.
"""
self._http_client = get_http_client()
self._models_cache: Optional[List[str]] = None
self._models_cache: list[str | None] = None
self._models_cache_time: float = 0
self._lock = asyncio.Lock()
# Issue #551: L1/L2 dual-tier caching system
self._response_cache = get_llm_cache()

def _init_metrics(self) -> Dict[str, Any]:
def _init_metrics(self) -> dict[str, Any]:
"""
Issue #665: Extracted from __init__ to reduce function length.

Expand Down Expand Up @@ -423,7 +425,7 @@ def _apply_tiered_routing(
lightweight models and complex requests to capable models.

Args:
messages: List of message dicts to analyze
messages: list of message dicts to analyze
provider: Current provider name
current_model: Currently selected model name

Expand Down Expand Up @@ -460,7 +462,7 @@ def _apply_tiered_routing(

async def switch_provider(
self, provider: str, model: str = "", validate: bool = False
) -> Dict[str, Any]:
) -> dict[str, Any]:
"""Switch the active LLM provider at runtime (#536).

Args:
Expand Down Expand Up @@ -491,7 +493,7 @@ async def switch_provider(
logger.info("Switched LLM provider to %s (model=%s)", provider, model)
return {"success": True, "provider": provider, "model": model}

async def get_all_provider_status(self) -> Dict[str, Any]:
async def get_all_provider_status(self) -> dict[str, Any]:
"""Get status of all configured providers (#536).

Returns:
Expand Down Expand Up @@ -635,7 +637,7 @@ def base_url(self) -> str:
"""Get Ollama base URL."""
return f"http://{self.settings.ollama_host}:{self.settings.ollama_port}"

async def _generate_cache_key(self, messages: List[ChatMessage], **params) -> str:
async def _generate_cache_key(self, messages: list[ChatMessage], **params) -> str:
"""Generate cache key with high-performance hashing."""
key_data = (
tuple((m.role, m.content) for m in messages),
Expand Down Expand Up @@ -753,7 +755,7 @@ async def _check_cache(
request_id: str,
start_time: float,
**kwargs,
) -> tuple[Optional[LLMResponse], Optional[str]]:
) -> tuple[LLMResponse | None, str | None]:
"""
Check L1/L2 cache for existing response.

Expand Down Expand Up @@ -812,7 +814,7 @@ async def _prepare_request_context(
Prepare request context with provider, model, and optimizations. Issue #620.

Args:
messages: List of message dicts
messages: list of message dicts
llm_type: Type of LLM
**kwargs: Additional parameters

Expand All @@ -834,17 +836,17 @@ async def _finalize_response(
messages: list,
model_name: str,
provider: str,
cache_key: Optional[str],
cache_key: str | None,
request_id: str,
start_time: float,
session_id: Optional[str],
session_id: str | None,
) -> LLMResponse:
"""
Finalize response with metrics, caching, and usage tracking. Issue #620.

Args:
response: LLM response object
messages: List of message dicts
messages: list of message dicts
model_name: Model name used
provider: Provider name
cache_key: Cache key if applicable
Expand Down Expand Up @@ -907,7 +909,7 @@ async def _execute_chat_request(
Execute chat request with caching and fallback. Issue #620.

Args:
messages: List of message dicts
messages: list of message dicts
llm_type: Type of LLM
request_id: Request identifier
start_time: Request start time
Expand Down Expand Up @@ -956,7 +958,7 @@ async def chat_completion(
Issue #665: Refactored to use helper methods for reduced complexity.

Args:
messages: List of message dicts
messages: list of message dicts
llm_type: Type of LLM ("orchestrator", "task", "chat", etc.)
**kwargs: Additional parameters (provider, model_name, etc.)

Expand Down Expand Up @@ -1010,7 +1012,7 @@ def _mark_fallback_response(
)

def _build_all_providers_failed_response(
self, request_id: str, last_error: Optional[str]
self, request_id: str, last_error: str | None
) -> LLMResponse:
"""Build error response when all providers fail. Issue #620."""
logger.error(f"All providers failed. Last error: {last_error}")
Expand Down Expand Up @@ -1204,7 +1206,7 @@ async def _track_llm_usage(
model: str,
response: LLMResponse,
processing_time: float,
session_id: Optional[str] = None,
session_id: str | None = None,
):
"""Track LLM usage for cost optimization analysis (Issue #229)."""
if not PATTERN_ANALYZER_AVAILABLE:
Expand Down Expand Up @@ -1269,7 +1271,7 @@ async def _handle_local_request(self, request: LLMRequest) -> LLMResponse:
return await self._local_handler.chat_completion(request)

# Utility methods
async def get_available_models(self, provider: str = "ollama") -> List[str]:
async def get_available_models(self, provider: str = "ollama") -> list[str]:
"""Get available models for a provider."""
if provider == "ollama":
ollama_host = os.getenv("AUTOBOT_OLLAMA_HOST")
Expand All @@ -1292,7 +1294,7 @@ async def get_available_models(self, provider: str = "ollama") -> List[str]:

return []

def get_metrics(self) -> Dict[str, Any]:
def get_metrics(self) -> dict[str, Any]:
"""Get performance metrics including cache and optimization statistics."""
metrics = self._metrics.copy()
# Issue #551: Include L1/L2 cache metrics
Expand All @@ -1311,7 +1313,7 @@ def get_metrics(self) -> Dict[str, Any]:
metrics["tiered_routing"] = self._tier_router.get_metrics()
return metrics

def get_cache_metrics(self) -> Dict[str, Any]:
def get_cache_metrics(self) -> dict[str, Any]:
"""
Get detailed cache performance metrics.

Expand All @@ -1322,7 +1324,7 @@ def get_cache_metrics(self) -> Dict[str, Any]:
"""
return self._response_cache.get_metrics()

async def clear_cache(self, l1: bool = True, l2: bool = True) -> Dict[str, int]:
async def clear_cache(self, l1: bool = True, l2: bool = True) -> dict[str, int]:
"""
Clear LLM response cache.

Expand Down Expand Up @@ -1356,11 +1358,11 @@ async def chat_completion_optimized(
agent_type: str,
user_message: str,
session_id: str,
user_name: Optional[str] = None,
user_role: Optional[str] = None,
available_tools: Optional[list] = None,
recent_context: Optional[str] = None,
additional_params: Optional[dict] = None,
user_name: str | None = None,
user_role: str | None = None,
available_tools: list | None = None,
recent_context: str | None = None,
additional_params: dict | None = None,
**llm_params,
) -> LLMResponse:
"""Chat completion with vLLM-optimized prompts. Issue #620."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
Provides efficient aiohttp client session management to prevent resource exhaustion
"""

from __future__ import annotations

import asyncio
import logging
import time
from typing import Any, Dict, Optional
from typing import Any

import aiohttp
from aiohttp import ClientSession, ClientTimeout, TCPConnector
Expand All @@ -24,8 +26,8 @@ class HTTPClientManager:
Prevents creating new ClientSession for each request which causes resource exhaustion.
"""

_instance: Optional["HTTPClientManager"] = None
_session: Optional[ClientSession] = None
_instance: "HTTPClientManager" | None = None
_session: ClientSession | None = None
_lock = asyncio.Lock()

def __new__(cls):
Expand Down Expand Up @@ -288,7 +290,7 @@ async def post(self, url: str, **kwargs) -> aiohttp.ClientResponse:
"""Convenience method for POST requests."""
return await self.request("POST", url, **kwargs)

async def get_json(self, url: str, **kwargs) -> Dict[str, Any]:
async def get_json(self, url: str, **kwargs) -> dict[str, Any]:
"""
Make a GET request and return JSON response.

Expand All @@ -304,8 +306,8 @@ async def get_json(self, url: str, **kwargs) -> Dict[str, Any]:
return await response.json()

async def post_json(
self, url: str, json_data: Dict[str, Any], **kwargs
) -> Dict[str, Any]:
self, url: str, json_data: dict[str, Any], **kwargs
) -> dict[str, Any]:
"""
Make a POST request with JSON data and return JSON response.

Expand Down Expand Up @@ -336,7 +338,7 @@ async def close(self):
self._connector = None
self._closed = True

def get_stats(self) -> Dict[str, Any]:
def get_stats(self) -> dict[str, Any]:
"""Get client usage statistics."""
utilization = (
self._active_requests / self._current_pool_size
Expand Down Expand Up @@ -374,7 +376,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
# Global singleton instance (thread-safe)
import threading

_http_client: Optional[HTTPClientManager] = None
_http_client: HTTPClientManager | None = None
_http_client_lock = threading.Lock()


Expand Down
Loading
Loading