diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..52d4858 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,223 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +SRE Agent is an AI-powered Site Reliability Engineering assistant that automates debugging, monitors application/infrastructure logs, diagnoses issues, and reports diagnostics. It integrates with Kubernetes clusters, GitHub repositories, and Slack for comprehensive incident response automation. + +## Architecture + +### Microservices Design +The system uses a microservices architecture with the following components: + +- **Orchestrator (Client)**: FastAPI-based MCP client (`sre_agent/client/`) that coordinates all services and handles incoming diagnostic requests +- **LLM Server**: Text generation service (`sre_agent/llm/`) supporting multiple AI providers (Anthropic, OpenAI, Gemini, Ollama) +- **Llama Firewall**: Security layer (`sre_agent/firewall/`) using Meta's Llama Prompt Guard for content validation +- **MCP Servers**: + - Kubernetes MCP (`sre_agent/servers/mcp-server-kubernetes/`) - TypeScript/Node.js K8s operations + - GitHub MCP (`sre_agent/servers/github/`) - TypeScript/Node.js repository operations + - Slack MCP (`sre_agent/servers/slack/`) - TypeScript/Node.js team notifications + - Prompt Server MCP (`sre_agent/servers/prompt_server/`) - Python structured prompts + +### Key Technologies +- **Languages**: Python 3.12+ (core services), TypeScript/Node.js (MCP servers) +- **Communication**: Model Context Protocol (MCP) with Server-Sent Events (SSE) transport +- **Infrastructure**: Docker Compose, AWS EKS deployment, GCP GKE deployment +- **AI/ML**: Multiple LLM providers, Hugging Face transformers + +### LLM Provider Support +- **Anthropic**: Claude models (API key required) +- **Google Gemini**: Gemini models (API key required) +- **Ollama**: Local LLM inference (no API key, privacy-focused) +- **OpenAI**: Placeholder (not yet implemented) +- **Self-hosted**: Placeholder (not yet implemented) + +## Common Development Commands + +### Project Setup +```bash +make project-setup # Install uv, create venv, install pre-commit hooks +``` + +### Code Quality +```bash +make check # Run linting, pre-commit hooks, and lock file check +make tests # Run pytest with coverage +make license-check # Verify dependency licences +``` + +### Service Management +```bash +# Local development - AWS +docker compose -f compose.aws.yaml up --build + +# Local development - GCP +docker compose -f compose.gcp.yaml up --build + +# Production with ECR images +docker compose -f compose.ecr.yaml up + +# Production with GAR images (Google) +docker compose -f compose.gar.yaml up + +# Test environment +docker compose -f compose.tests.yaml up +``` + +### Testing +```bash +# All tests +make tests + +# Specific test file +uv run python -m pytest tests/unit_tests/test_adapters.py + +# Specific test function +uv run python -m pytest tests/unit_tests/test_adapters.py::test_specific_function + +# With coverage +uv run python -m pytest --cov --cov-config=pyproject.toml --cov-report=xml + +# Security tests only +uv run python -m pytest tests/security_tests/ +``` + +## Configuration + +### Environment Variables Required +- `DEV_BEARER_TOKEN`: API authentication for the orchestrator +- `ANTHROPIC_API_KEY`: Claude API access (for Anthropic models) +- `GEMINI_API_KEY`: Google Gemini API access (for Gemini models) +- `OLLAMA_API_URL`: Ollama API endpoint (for local LLM inference, default: http://localhost:11434) +- `GITHUB_PERSONAL_ACCESS_TOKEN`: GitHub integration +- `SLACK_BOT_TOKEN`, `SLACK_TEAM_ID`, `CHANNEL_ID`: Slack notifications +- `AWS_REGION`, `TARGET_EKS_CLUSTER_NAME`: AWS EKS cluster access +- `GCP_PROJECT_ID`, `TARGET_GKE_CLUSTER_NAME`, `GKE_ZONE`: GCP GKE cluster access +- `HF_TOKEN`: Hugging Face model access + +### Cloud Platform Setup +- **AWS**: Credentials must be available at `~/.aws/credentials` for EKS cluster access +- **GCP**: Use `gcloud auth login` and `gcloud config set project YOUR_PROJECT_ID` for GKE access + +### Ollama Setup (Local LLM) +- **Install**: Visit [ollama.ai](https://ollama.ai) and follow installation instructions +- **Start**: Run `ollama serve` in your terminal +- **Models**: Download models like `ollama pull llama3.1` +- **Benefits**: Privacy-focused, no API costs, offline capable + +### Credential Setup Script +Use the interactive setup script for easy configuration: +```bash +python setup_credentials.py +# or with platform selection +python setup_credentials.py --platform aws +python setup_credentials.py --platform gcp +``` + +## Service Architecture Details + +### Communication Flow +1. Orchestrator receives `/diagnose` requests on port 8003 +2. Requests pass through Llama Firewall for security validation +3. LLM Server processes AI reasoning (using Anthropic, Gemini, or Ollama) +4. MCP servers handle tool operations (K8s, GitHub, Slack) +5. Results reported back via Slack notifications + +### Health Checks +All services implement health monitoring accessible via `/health` endpoints. + +## Development Patterns + +### MCP Integration +All external tool interactions use the Model Context Protocol standard. When adding new tools: +- Follow existing MCP server patterns in `sre_agent/servers/` +- Implement SSE transport for real-time communication +- Add health check endpoints + +### Security Considerations +- All requests pass through Llama Firewall validation +- Bearer token authentication required for API access +- Input validation at multiple service layers +- No secrets in code - use environment variables + +**IMPORTANT: Never commit the .env file!** +- The `.env` file contains sensitive credentials (API keys, tokens, secrets) +- It is included in `.gitignore` and should never be committed to the repository +- Use `python setup_credentials.py` to generate the `.env` file locally +- Each developer/environment needs their own `.env` file with appropriate credentials +- For production deployments, use proper secret management (AWS Secrets Manager, K8s secrets, etc.) + +### Code Style +- **Language**: Use British English spelling throughout (e.g., "specialised", "organised", "recognised") +- **Python**: Uses ruff, black, mypy for formatting and type checking +- **TypeScript**: Standard TypeScript/Node.js conventions +- **Line length**: 88 characters +- **Docstrings**: Google-style docstrings for Python +- **Type checking**: Strict type checking enabled + +### British English Spelling Guidelines +The project uses British English spelling. Common differences from American English: +- **-ise/-ize**: Use "-ise" endings (e.g., "organise", "recognise", "specialise") +- **-our/-or**: Use "-our" endings (e.g., "colour", "honour", "behaviour") +- **-re/-er**: Use "-re" endings (e.g., "centre", "metre", "theatre") +- **-ence/-ense**: Use "-ence" endings (e.g., "defence", "licence" as noun) +- **-yse/-yze**: Use "-yse" endings (e.g., "analyse", "paralyse") + +**Examples in SRE context:** +- "optimise" (not "optimize") +- "customise" (not "customize") +- "analyse logs" (not "analyze logs") +- "centralised monitoring" (not "centralized monitoring") +- "behaviour analysis" (not "behavior analysis") + +## Workspace Structure +This is a uv workspace with members: +- `sre_agent/llm`: LLM service with multi-provider support +- `sre_agent/client`: FastAPI orchestrator service +- `sre_agent/servers/prompt_server`: Python MCP server for structured prompts +- `sre_agent/firewall`: Llama Prompt Guard security layer +- `sre_agent/shared`: Shared utilities and schemas + +Each Python service has its own `pyproject.toml`. TypeScript MCP servers use `package.json`: +- `sre_agent/servers/mcp-server-kubernetes/`: Kubernetes operations (Node.js/TypeScript) +- `sre_agent/servers/github/`: GitHub API integration (Node.js/TypeScript) +- `sre_agent/servers/slack/`: Slack notifications (Node.js/TypeScript) + +## API Usage + +### Primary Endpoint +```bash +POST http://localhost:8003/diagnose +Authorization: Bearer +Content-Type: application/json +{"text": ""} +``` + +### Health Check +```bash +GET http://localhost:8003/health +``` + +## Deployment +- **Local**: Docker Compose with local builds (AWS: `compose.aws.yaml`, GCP: `compose.gcp.yaml`) +- **Production AWS**: ECR-based images on AWS EKS (`compose.ecr.yaml`) +- **Production GCP**: GAR-based images on GCP GKE (`compose.gar.yaml`) +- See [EKS Deployment](https://github.com/fuzzylabs/sre-agent-deployment) for cloud deployment examples + +## TypeScript MCP Server Development +For TypeScript MCP servers in `sre_agent/servers/`: + +### Building and Testing +```bash +# Kubernetes MCP server +cd sre_agent/servers/mcp-server-kubernetes +npm run build # Build TypeScript +npm run test # Run vitest tests +npm run dev # Watch mode + +# GitHub/Slack MCP servers +cd sre_agent/servers/github # or /slack +npm run build +npm run watch # Watch mode +``` \ No newline at end of file diff --git a/README.md b/README.md index 793d861..e7acceb 100644 --- a/README.md +++ b/README.md @@ -33,19 +33,51 @@ We've been writing blogs and sharing our learnings along the way. Check out our The SRE Agent supports multiple the following LLM providers: ### Anthropic -- **Models**: e.g. "claude-4-0-sonnet-latest" +- **Models**: e.g. "claude-3-5-sonnet-latest" - **Setup**: Requires `ANTHROPIC_API_KEY` ### Google Gemini -- **Models**: e.g, "gemini-2.5-flash" +- **Models**: e.g. "gemini-2.5-flash" - **Setup**: Requires `GEMINI_API_KEY` +### Ollama (Local) +- **Models**: e.g. "llama3.1", "mistral", "codellama" +- **Setup**: Install Ollama locally, no API key needed +- **Benefits**: Privacy, no API costs, offline capable + +
+🦙 Ollama Setup Guide + +### Installing Ollama +1. **Install Ollama**: Visit [ollama.ai](https://ollama.ai) and follow installation instructions +2. **Start Ollama**: Run `ollama serve` in your terminal +3. **Pull a model**: Download a model like `ollama pull llama3.1` + +### Recommended Models for SRE Tasks +- **llama3.1** (8B): Fast, good general reasoning +- **mistral** (7B): Excellent for technical tasks +- **codellama** (7B): Specialised for code analysis +- **llama3.1:70b**: Most capable but requires more resources + +### Configuration +Set these in your `.env` file: +```bash +PROVIDER=ollama +MODEL=llama3.1 +OLLAMA_API_URL=http://localhost:11434 # default +``` + +
+ ## 🛠️ Prerequisites - [Docker](https://docs.docker.com/get-docker/) - A `.env` file in your project root ([see below](#getting-started)) -- An app deployed on AWS EKS (Elastic Kubernetes Service) or GCP GKE (Google Kubernetes Engine) +- A Kubernetes cluster: + - **Cloud**: AWS EKS, GCP GKE + - **Local**: minikube, Docker Desktop, kind, k3s +- For Ollama: Local installation ([see Ollama Setup Guide](#ollama-setup-guide)) ## ⚡ Getting Started diff --git a/setup_credentials.py b/setup_credentials.py index 879068b..90a7573 100644 --- a/setup_credentials.py +++ b/setup_credentials.py @@ -82,13 +82,20 @@ def get_credential_config(platform: str) -> dict[str, dict[str, Any]]: "prompt": "Enter your Github project root directory: ", "mask_value": False, }, - "PROVIDER": {"prompt": "Enter your LLM provider name: ", "mask_value": False}, + "PROVIDER": { + "prompt": "Enter your LLM provider name (anthropic/gemini/ollama): ", + "mask_value": False, + }, "MODEL": {"prompt": "Enter your LLM model name: ", "mask_value": False}, "GEMINI_API_KEY": {"prompt": "Enter your Gemini API Key: ", "mask_value": True}, "ANTHROPIC_API_KEY": { "prompt": "Enter your Anthropic API Key: ", "mask_value": True, }, + "OLLAMA_API_URL": { + "prompt": "Enter your Ollama API URL (default: http://localhost:11434): ", + "mask_value": False, + }, "MAX_TOKENS": { "prompt": "Controls the maximum number of tokens the LLM can generate in " "its response e.g. 10000: ", diff --git a/sre_agent/llm/main.py b/sre_agent/llm/main.py index 4aa707d..3e50cd8 100644 --- a/sre_agent/llm/main.py +++ b/sre_agent/llm/main.py @@ -13,6 +13,7 @@ BaseClient, DummyClient, GeminiClient, + OllamaClient, OpenAIClient, SelfHostedClient, ) @@ -32,6 +33,7 @@ Provider.MOCK: DummyClient(), Provider.OPENAI: OpenAIClient(), Provider.GEMINI: GeminiClient(), + Provider.OLLAMA: OllamaClient(), Provider.SELF_HOSTED: SelfHostedClient(), } diff --git a/sre_agent/llm/utils/clients.py b/sre_agent/llm/utils/clients.py index 05d3e9a..0371a95 100644 --- a/sre_agent/llm/utils/clients.py +++ b/sre_agent/llm/utils/clients.py @@ -4,6 +4,7 @@ from abc import ABC, abstractmethod from typing import Any, cast +import requests from anthropic import Anthropic from anthropic.types import MessageParam as AnthropicMessageBlock from anthropic.types import ToolParam @@ -213,20 +214,142 @@ def generate(self, payload: TextGenerationPayload) -> Message: model=response.model_version, content=content, role="assistant", - stop_reason=response.candidates[0].finish_reason - if response.candidates - else "end_turn", - usage=Usage( - input_tokens=response.usage_metadata.prompt_token_count, - output_tokens=response.usage_metadata.candidates_token_count, - cache_creation_input_tokens=None, - cache_read_input_tokens=response.usage_metadata.cached_content_token_count, - ) - if response.usage_metadata - else None, + stop_reason=( + response.candidates[0].finish_reason + if response.candidates + else "end_turn" + ), + usage=( + Usage( + input_tokens=response.usage_metadata.prompt_token_count, + output_tokens=response.usage_metadata.candidates_token_count, + cache_creation_input_tokens=None, + cache_read_input_tokens=response.usage_metadata.cached_content_token_count, + ) + if response.usage_metadata + else None + ), ) +class OllamaClient(BaseClient): + """A client for performing text generation using Ollama.""" + + def __init__(self, settings: LLMSettings = LLMSettings()) -> None: + """The constructor for the Ollama client.""" + super().__init__(settings) + self.api_url = settings.ollama_api_url + + def generate(self, payload: TextGenerationPayload) -> Message: + """A method for generating text using the Ollama API.""" + try: + # Convert the payload to Ollama format + messages = self._convert_messages_to_ollama(payload.messages) + + # Prepare the request data + request_data = { + "model": self.settings.model, + "messages": messages, + "stream": False, + "options": {}, + } + + # Add max_tokens if specified + if self.settings.max_tokens: + request_data["options"]["num_predict"] = self.settings.max_tokens + + # Add tools if present + if payload.tools: + request_data["tools"] = self._convert_tools_to_ollama(payload.tools) + + logger.debug(f"Ollama request: {request_data}") + + # Make the request to Ollama + response = requests.post( + f"{self.api_url}/api/chat", + json=request_data, + timeout=120, + headers={"Content-Type": "application/json"}, + ) + response.raise_for_status() + + ollama_response = response.json() + logger.debug(f"Ollama response: {ollama_response}") + + # Convert response back to our format + content: Content = [ + TextBlock( + text=ollama_response.get("message", {}).get("content", ""), + type="text", + ) + ] + + # Extract usage information if available + usage = None + if "usage" in ollama_response: + usage_data = ollama_response["usage"] + usage = Usage( + input_tokens=usage_data.get("prompt_tokens", 0), + output_tokens=usage_data.get("completion_tokens", 0), + cache_creation_input_tokens=None, + cache_read_input_tokens=None, + ) + + input_tokens = usage.input_tokens if usage else "N/A" + output_tokens = usage.output_tokens if usage else "N/A" + logger.info( + f"Ollama token usage - Input: {input_tokens}, Output: {output_tokens}" + ) + + return Message( + id=f"ollama_{hash(str(ollama_response))}", + model=self.settings.model, + content=content, + role="assistant", + stop_reason="end_turn", + usage=usage, + ) + + except requests.RequestException as e: + logger.error(f"Failed to connect to Ollama: {e}") + raise ValueError(f"Ollama API error: {e}") + except Exception as e: + logger.error(f"Unexpected error in Ollama client: {e}") + raise + + def _convert_messages_to_ollama(self, messages: list[Any]) -> list[dict[str, Any]]: + """Convert messages to Ollama format.""" + ollama_messages = [] + + for message in messages: + role = message.get("role", "user") + content = message.get("content", "") + + # Handle different content types + if isinstance(content, list): + # Extract text from content blocks + text_parts = [] + for block in content: + if isinstance(block, dict) and block.get("type") == "text": + text_parts.append(block.get("text", "")) + content = "\n".join(text_parts) + + ollama_messages.append({"role": role, "content": str(content)}) + + return ollama_messages + + def _convert_tools_to_ollama(self, tools: list[Any]) -> list[dict[str, Any]]: + """Convert MCP tools to Ollama format.""" + ollama_tools = [] + + for tool in tools: + # Convert MCP tool format to Ollama function calling format + if isinstance(tool, dict) and "function" in tool: + ollama_tools.append({"type": "function", "function": tool["function"]}) + + return ollama_tools + + class SelfHostedClient(BaseClient): """A client for performing text generation using a self-hosted model.""" diff --git a/sre_agent/llm/utils/schemas.py b/sre_agent/llm/utils/schemas.py index d8fccac..a512bbd 100644 --- a/sre_agent/llm/utils/schemas.py +++ b/sre_agent/llm/utils/schemas.py @@ -12,6 +12,7 @@ class Provider(StrEnum): ANTHROPIC = "anthropic" OPENAI = "openai" GEMINI = "gemini" + OLLAMA = "ollama" SELF_HOSTED = "self-hosted" MOCK = "mock" @@ -29,3 +30,7 @@ class LLMSettings(BaseSettings): max_tokens: int | None = Field( description="The maximum number of tokens for generation.", default=10000 ) + ollama_api_url: str = Field( + description="The Ollama API URL for local LLM inference.", + default="http://localhost:11434", + )