mrveiss · mrveiss · Mar 27, 2026 · Mar 27, 2026
@@ -8,22 +8,24 @@
 This simplified class delegates to specialized provider modules.
 """
 
+from __future__ import annotations
+
 import asyncio
 import logging
 import os
 import re
 import time
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any
 
 import aiohttp
 import xxhash
+from config import ConfigManager
 from constants.model_constants import ModelConstants
 
 from autobot_shared.error_boundaries import error_boundary, get_error_boundary_manager
 from autobot_shared.http_client import get_http_client
 from autobot_shared.tracing import get_tracer
-from config import ConfigManager
 
 # Issue #1403: Adapter registry
 from .adapters.registry import get_adapter_registry
@@ -101,7 +103,7 @@ class LLMInterface:
     - Structured request/response handling
     """
 
-    def __init__(self, settings: Optional[LLMSettings] = None):
+    def __init__(self, settings: LLMSettings | None = None):
         """
         Initialize LLM interface with optional settings and configure providers.
 
@@ -192,13 +194,13 @@ def _init_async_components(self) -> None:
         Initialize async components including HTTP client and caching.
         """
         self._http_client = get_http_client()
-        self._models_cache: Optional[List[str]] = None
+        self._models_cache: list[str | None] = None
         self._models_cache_time: float = 0
         self._lock = asyncio.Lock()
         # Issue #551: L1/L2 dual-tier caching system
         self._response_cache = get_llm_cache()
 
-    def _init_metrics(self) -> Dict[str, Any]:
+    def _init_metrics(self) -> dict[str, Any]:
         """
         Issue #665: Extracted from __init__ to reduce function length.
 
@@ -423,7 +425,7 @@ def _apply_tiered_routing(
         lightweight models and complex requests to capable models.
 
         Args:
-            messages: List of message dicts to analyze
+            messages: list of message dicts to analyze
             provider: Current provider name
             current_model: Currently selected model name
 
@@ -460,7 +462,7 @@ def _apply_tiered_routing(
 
     async def switch_provider(
         self, provider: str, model: str = "", validate: bool = False
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
         """Switch the active LLM provider at runtime (#536).
 
         Args:
@@ -491,7 +493,7 @@ async def switch_provider(
         logger.info("Switched LLM provider to %s (model=%s)", provider, model)
         return {"success": True, "provider": provider, "model": model}
 
-    async def get_all_provider_status(self) -> Dict[str, Any]:
+    async def get_all_provider_status(self) -> dict[str, Any]:
         """Get status of all configured providers (#536).
 
         Returns:
@@ -635,7 +637,7 @@ def base_url(self) -> str:
         """Get Ollama base URL."""
         return f"http://{self.settings.ollama_host}:{self.settings.ollama_port}"
 
-    async def _generate_cache_key(self, messages: List[ChatMessage], **params) -> str:
+    async def _generate_cache_key(self, messages: list[ChatMessage], **params) -> str:
         """Generate cache key with high-performance hashing."""
         key_data = (
             tuple((m.role, m.content) for m in messages),
@@ -753,7 +755,7 @@ async def _check_cache(
         request_id: str,
         start_time: float,
         **kwargs,
-    ) -> tuple[Optional[LLMResponse], Optional[str]]:
+    ) -> tuple[LLMResponse | None, str | None]:
         """
         Check L1/L2 cache for existing response.
 
@@ -812,7 +814,7 @@ async def _prepare_request_context(
         Prepare request context with provider, model, and optimizations. Issue #620.
 
         Args:
-            messages: List of message dicts
+            messages: list of message dicts
             llm_type: Type of LLM
             **kwargs: Additional parameters
 
@@ -834,17 +836,17 @@ async def _finalize_response(
         messages: list,
         model_name: str,
         provider: str,
-        cache_key: Optional[str],
+        cache_key: str | None,
         request_id: str,
         start_time: float,
-        session_id: Optional[str],
+        session_id: str | None,
     ) -> LLMResponse:
         """
         Finalize response with metrics, caching, and usage tracking. Issue #620.
 
         Args:
             response: LLM response object
-            messages: List of message dicts
+            messages: list of message dicts
             model_name: Model name used
             provider: Provider name
             cache_key: Cache key if applicable
@@ -907,7 +909,7 @@ async def _execute_chat_request(
         Execute chat request with caching and fallback. Issue #620.
 
         Args:
-            messages: List of message dicts
+            messages: list of message dicts
             llm_type: Type of LLM
             request_id: Request identifier
             start_time: Request start time
@@ -956,7 +958,7 @@ async def chat_completion(
         Issue #665: Refactored to use helper methods for reduced complexity.
 
         Args:
-            messages: List of message dicts
+            messages: list of message dicts
             llm_type: Type of LLM ("orchestrator", "task", "chat", etc.)
             **kwargs: Additional parameters (provider, model_name, etc.)
 
@@ -1010,7 +1012,7 @@ def _mark_fallback_response(
             )
 
     def _build_all_providers_failed_response(
-        self, request_id: str, last_error: Optional[str]
+        self, request_id: str, last_error: str | None
     ) -> LLMResponse:
         """Build error response when all providers fail. Issue #620."""
         logger.error(f"All providers failed. Last error: {last_error}")
@@ -1204,7 +1206,7 @@ async def _track_llm_usage(
         model: str,
         response: LLMResponse,
         processing_time: float,
-        session_id: Optional[str] = None,
+        session_id: str | None = None,
     ):
         """Track LLM usage for cost optimization analysis (Issue #229)."""
         if not PATTERN_ANALYZER_AVAILABLE:
@@ -1269,7 +1271,7 @@ async def _handle_local_request(self, request: LLMRequest) -> LLMResponse:
         return await self._local_handler.chat_completion(request)
 
     # Utility methods
-    async def get_available_models(self, provider: str = "ollama") -> List[str]:
+    async def get_available_models(self, provider: str = "ollama") -> list[str]:
         """Get available models for a provider."""
         if provider == "ollama":
             ollama_host = os.getenv("AUTOBOT_OLLAMA_HOST")
@@ -1292,7 +1294,7 @@ async def get_available_models(self, provider: str = "ollama") -> List[str]:
 
         return []
 
-    def get_metrics(self) -> Dict[str, Any]:
+    def get_metrics(self) -> dict[str, Any]:
         """Get performance metrics including cache and optimization statistics."""
         metrics = self._metrics.copy()
         # Issue #551: Include L1/L2 cache metrics
@@ -1311,7 +1313,7 @@ def get_metrics(self) -> Dict[str, Any]:
             metrics["tiered_routing"] = self._tier_router.get_metrics()
         return metrics
 
-    def get_cache_metrics(self) -> Dict[str, Any]:
+    def get_cache_metrics(self) -> dict[str, Any]:
         """
         Get detailed cache performance metrics.
 
@@ -1322,7 +1324,7 @@ def get_cache_metrics(self) -> Dict[str, Any]:
         """
         return self._response_cache.get_metrics()
 
-    async def clear_cache(self, l1: bool = True, l2: bool = True) -> Dict[str, int]:
+    async def clear_cache(self, l1: bool = True, l2: bool = True) -> dict[str, int]:
         """
         Clear LLM response cache.
 
@@ -1356,11 +1358,11 @@ async def chat_completion_optimized(
         agent_type: str,
         user_message: str,
         session_id: str,
-        user_name: Optional[str] = None,
-        user_role: Optional[str] = None,
-        available_tools: Optional[list] = None,
-        recent_context: Optional[str] = None,
-        additional_params: Optional[dict] = None,
+        user_name: str | None = None,
+        user_role: str | None = None,
+        available_tools: list | None = None,
+        recent_context: str | None = None,
+        additional_params: dict | None = None,
         **llm_params,
     ) -> LLMResponse:
         """Chat completion with vLLM-optimized prompts. Issue #620."""

@@ -6,10 +6,12 @@
 Provides efficient aiohttp client session management to prevent resource exhaustion
 """
 
+from __future__ import annotations
+
 import asyncio
 import logging
 import time
-from typing import Any, Dict, Optional
+from typing import Any
 
 import aiohttp
 from aiohttp import ClientSession, ClientTimeout, TCPConnector
@@ -24,8 +26,8 @@ class HTTPClientManager:
     Prevents creating new ClientSession for each request which causes resource exhaustion.
     """
 
-    _instance: Optional["HTTPClientManager"] = None
-    _session: Optional[ClientSession] = None
+    _instance: "HTTPClientManager" | None = None
+    _session: ClientSession | None = None
     _lock = asyncio.Lock()
 
     def __new__(cls):
@@ -288,7 +290,7 @@ async def post(self, url: str, **kwargs) -> aiohttp.ClientResponse:
         """Convenience method for POST requests."""
         return await self.request("POST", url, **kwargs)
 
-    async def get_json(self, url: str, **kwargs) -> Dict[str, Any]:
+    async def get_json(self, url: str, **kwargs) -> dict[str, Any]:
         """
         Make a GET request and return JSON response.
 
@@ -304,8 +306,8 @@ async def get_json(self, url: str, **kwargs) -> Dict[str, Any]:
             return await response.json()
 
     async def post_json(
-        self, url: str, json_data: Dict[str, Any], **kwargs
-    ) -> Dict[str, Any]:
+        self, url: str, json_data: dict[str, Any], **kwargs
+    ) -> dict[str, Any]:
         """
         Make a POST request with JSON data and return JSON response.
 
@@ -336,7 +338,7 @@ async def close(self):
             self._connector = None
             self._closed = True
 
-    def get_stats(self) -> Dict[str, Any]:
+    def get_stats(self) -> dict[str, Any]:
         """Get client usage statistics."""
         utilization = (
             self._active_requests / self._current_pool_size
@@ -374,7 +376,7 @@ async def __aexit__(self, exc_type, exc_val, exc_tb):
 # Global singleton instance (thread-safe)
 import threading
 
-_http_client: Optional[HTTPClientManager] = None
+_http_client: HTTPClientManager | None = None
 _http_client_lock = threading.Lock()