diff --git a/.env.full b/.env.full new file mode 100644 index 0000000..15cde9e --- /dev/null +++ b/.env.full @@ -0,0 +1,71 @@ +# SRE Agent Full Configuration +# This file contains all possible configuration options for production use +# All features enabled - requires credentials for all services + +# ===== ESSENTIAL CREDENTIALS ===== +# Basic authentication token for API access +DEV_BEARER_TOKEN=your_dev_token_here + +# Hugging Face token for Llama Firewall (REQUIRED) +# Get from: https://huggingface.co/settings/tokens +# Needs read access to meta-llama/Llama-Prompt-Guard-2-86M +HF_TOKEN=your_hugging_face_token_here + +# LLM Provider Configuration +PROVIDER=anthropic # Options: anthropic, gemini, mock +MODEL=claude-3-5-sonnet-20241022 # Your preferred model + +# LLM API Keys (provide the one matching your PROVIDER) +ANTHROPIC_API_KEY=your_anthropic_api_key_here # Required if PROVIDER=anthropic +GEMINI_API_KEY=your_gemini_api_key_here # Required if PROVIDER=gemini + +# ===== SLACK INTEGRATION ===== +# For sending notifications and alerts +SLACK_BOT_TOKEN=your_slack_bot_token_here +SLACK_TEAM_ID=your_slack_team_id_here +SLACK_SIGNING_SECRET=your_slack_signing_secret_here +SLACK_CHANNEL_ID=your_slack_channel_id_here + +# ===== GITHUB INTEGRATION ===== +# For repository access and code analysis +GITHUB_PERSONAL_ACCESS_TOKEN=your_github_token_here +GITHUB_ORGANISATION=your_org_name +GITHUB_REPO_NAME=your_repo_name +PROJECT_ROOT=src # Root directory of your project + +# ===== KUBERNETES INTEGRATION ===== +# Choose AWS (EKS) OR GCP (GKE) - not both + +# AWS EKS Configuration +AWS_REGION=us-east-1 +AWS_ACCOUNT_ID=your_aws_account_id +TARGET_EKS_CLUSTER_NAME=your_eks_cluster_name + +# GCP GKE Configuration (alternative to AWS) +# CLOUDSDK_CORE_PROJECT=your_gcp_project_id +# CLOUDSDK_COMPUTE_REGION=us-central1 +# TARGET_GKE_CLUSTER_NAME=your_gke_cluster_name + +# ===== SERVICE CONFIGURATION ===== +# Services running on your cluster (customize for your setup) +SERVICES=["cartservice", "adservice", "emailservice", "frontend", "checkoutservice"] + +# Tools available to the agent +TOOLS=["list_pods", "get_logs", "get_file_contents", "slack_post_message", "create_github_issue"] + +# ===== PERFORMANCE SETTINGS ===== +# Maximum tokens the LLM can generate +MAX_TOKENS=10000 + +# Query timeout in seconds +QUERY_TIMEOUT=300 + +# ===== USAGE ===== +# 1. Replace all placeholder values with your actual credentials +# 2. Generate this file with: uv run python setup_credentials.py --mode full +# 3. Choose either AWS or GCP configuration (comment out the unused one) +# 4. Customize SERVICES and TOOLS for your environment +# 5. Start with: +# - AWS: docker compose -f compose.aws.yaml up +# - GCP: docker compose -f compose.gcp.yaml up +# 6. Access the API at http://localhost:8003 diff --git a/.env.minimal b/.env.minimal new file mode 100644 index 0000000..61dceb8 --- /dev/null +++ b/.env.minimal @@ -0,0 +1,59 @@ +# SRE Agent Minimal Configuration +# This file contains only the essential credentials needed for basic functionality +# Requires real API keys for LLM provider + +# ===== ESSENTIAL CREDENTIALS ===== +# Basic authentication token for API access +DEV_BEARER_TOKEN=your_dev_token_here + +# Hugging Face token for Llama Firewall (REQUIRED) +# Get from: https://huggingface.co/settings/tokens +# Needs read access to meta-llama/Llama-Prompt-Guard-2-86M +HF_TOKEN=your_hugging_face_token_here + +# LLM Provider Configuration +PROVIDER=anthropic # or "gemini" +MODEL=claude-3-5-sonnet-20241022 # or your preferred model + +# LLM API Keys (provide the one matching your PROVIDER) +ANTHROPIC_API_KEY=your_anthropic_api_key_here # Required if PROVIDER=anthropic +GEMINI_API_KEY=your_gemini_api_key_here # Required if PROVIDER=gemini + +# ===== DEFAULT CONFIGURATION ===== +# Service configuration (minimal defaults) +SERVICES=["cartservice", "adservice", "emailservice"] +TOOLS=["list_pods", "get_logs", "get_file_contents", "slack_post_message"] + +# GitHub repository defaults (for prompt server) +GITHUB_ORGANISATION=fuzzylabs +GITHUB_REPO_NAME=microservices-demo +PROJECT_ROOT=src + +# Timeouts and limits +MAX_TOKENS=10000 +QUERY_TIMEOUT=300 + +# ===== DISABLED FEATURES ===== +# These features are not configured in minimal mode +# Slack notifications - disabled +SLACK_BOT_TOKEN=null +SLACK_TEAM_ID=null +SLACK_SIGNING_SECRET=null +SLACK_CHANNEL_ID=null + +# GitHub integration - disabled +GITHUB_PERSONAL_ACCESS_TOKEN=null + +# Kubernetes integration - not configured +# You'll need to add these manually if you want K8s integration: +# AWS_REGION=us-east-1 +# TARGET_EKS_CLUSTER_NAME=your_cluster_name +# or +# CLOUDSDK_CORE_PROJECT=your_project_id +# TARGET_GKE_CLUSTER_NAME=your_cluster_name + +# ===== USAGE ===== +# 1. Replace placeholder values with your actual credentials +# 2. Generate this file with: uv run python setup_credentials.py --mode minimal +# 3. Start with: docker compose -f compose.aws.yaml up (or compose.gcp.yaml) +# 4. Some features (Slack, GitHub, K8s) will be disabled but core functionality works diff --git a/.env.testing b/.env.testing new file mode 100644 index 0000000..90f119e --- /dev/null +++ b/.env.testing @@ -0,0 +1,48 @@ +# SRE Agent Testing Configuration +# This file contains the minimal configuration needed for testing with mock services +# No real API keys required - everything uses mock implementations + +# ===== ESSENTIAL CREDENTIALS ===== +# Basic authentication token (can be any value for testing) +DEV_BEARER_TOKEN=dev-token-123 + +# Hugging Face token for Llama Firewall (REQUIRED - get from https://huggingface.co/settings/tokens) +# This is the only real API key needed for testing +HF_TOKEN=your_hugging_face_token_here + +# LLM Provider Configuration (mock = no real LLM calls) +PROVIDER=mock +MODEL=mock-model + +# Mock LLM credentials (not used but may be required by validation) +ANTHROPIC_API_KEY=null +GEMINI_API_KEY=null + +# ===== DEFAULT CONFIGURATION ===== +# GitHub repository defaults (for prompt server) +GITHUB_ORGANISATION=fuzzylabs +GITHUB_REPO_NAME=microservices-demo +PROJECT_ROOT=src + +# Service configuration defaults +SERVICES=["cartservice", "adservice", "emailservice"] +TOOLS=["list_pods", "get_logs", "get_file_contents", "slack_post_message"] + +# Timeouts and limits +MAX_TOKENS=10000 +QUERY_TIMEOUT=300 + +# Slack configuration (disabled for testing) +SLACK_BOT_TOKEN=null +SLACK_TEAM_ID=null +SLACK_SIGNING_SECRET=null +SLACK_CHANNEL_ID=null + +# GitHub integration (disabled for testing) +GITHUB_PERSONAL_ACCESS_TOKEN=null + +# ===== USAGE ===== +# 1. Set your HF_TOKEN above (the only real credential needed) +# 2. Generate this file with: uv run python setup_credentials.py --mode testing +# 3. Start the testing environment with: docker compose -f compose.tests.yaml up --build +# (Note: First build takes ~10-15 minutes, subsequent starts are much faster) diff --git a/.github/workflows/build-push-images.yml b/.github/workflows/build-push-images.yml index 2041fa7..3f77b0e 100644 --- a/.github/workflows/build-push-images.yml +++ b/.github/workflows/build-push-images.yml @@ -4,10 +4,13 @@ on: push: branches: - main + release: + types: [published] permissions: id-token: write contents: read + packages: write jobs: build-and-push: @@ -57,10 +60,32 @@ jobs: id: login-ecr uses: aws-actions/amazon-ecr-login@v2 + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ steps.login-ecr.outputs.registry }}/mcp/${{ matrix.name }} + ghcr.io/fuzzylabs/sre-agent-${{ matrix.name }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,value=latest,enable={{is_default_branch}} + - name: Build and Push ${{ matrix.name }} uses: docker/build-push-action@v6 with: context: ${{ matrix.context }} file: ${{ matrix.dockerfile }} push: true - tags: ${{ steps.login-ecr.outputs.registry }}/mcp/${{ matrix.name }}:latest + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/CLAUDE.md b/CLAUDE.md index 61e5c8a..08f58d5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,6 +26,38 @@ The system uses a microservices architecture with the following components: - **Infrastructure**: Docker Compose, AWS EKS deployment - **AI/ML**: Multiple LLM providers, Hugging Face transformers +## Quick Start + +### ๐Ÿš€ Fastest Way to Try SRE Agent (2-5 minutes) +```bash +# 1. Basic setup +make project-setup + +# 2. Quick setup with minimal credentials +uv run python setup_credentials.py --mode quick + +# 3. Start with public images (NO BUILD TIME!) +docker compose -f compose.ghcr.yaml up +``` + +### ๐Ÿ”ง Production Setup +```bash +# 1. Complete setup with all features +uv run python setup_credentials.py --mode full --platform aws # or gcp + +# 2. Start production environment +docker compose -f compose.aws.yaml up # or compose.gcp.yaml +``` + +### ๐Ÿงช Testing with Local Builds +```bash +# 1. Testing setup (only needs HF_TOKEN) +uv run python setup_credentials.py --mode testing + +# 2. Start testing environment (builds locally - ~10-15 minutes first time) +docker compose -f compose.tests.yaml up --build +``` + ## Common Development Commands ### Project Setup @@ -42,6 +74,9 @@ make license-check # Verify dependency licenses ### Service Management ```bash +# Quick start with public images (FASTEST - 2-5 minutes) +docker compose -f compose.ghcr.yaml up + # Local development - AWS docker compose -f compose.aws.yaml up --build @@ -54,7 +89,7 @@ docker compose -f compose.ecr.yaml up # Production with GAR images (Google) docker compose -f compose.gar.yaml up -# Test environment +# Test environment (local builds) docker compose -f compose.tests.yaml up ``` @@ -78,29 +113,63 @@ uv run python -m pytest tests/security_tests/ ## Configuration -### Environment Variables Required +### Environment Variables by Priority + +#### ๐Ÿ”ด Essential (Required for basic functionality) - `DEV_BEARER_TOKEN`: API authentication for the orchestrator -- `ANTHROPIC_API_KEY`: Claude API access (for Anthropic models) -- `GEMINI_API_KEY`: Google Gemini API access (for Gemini models) -- `GITHUB_PERSONAL_ACCESS_TOKEN`: GitHub integration -- `SLACK_BOT_TOKEN`, `SLACK_TEAM_ID`, `CHANNEL_ID`: Slack notifications -- `AWS_REGION`, `TARGET_EKS_CLUSTER_NAME`: AWS EKS cluster access -- `GCP_PROJECT_ID`, `TARGET_GKE_CLUSTER_NAME`, `GKE_ZONE`: GCP GKE cluster access -- `HF_TOKEN`: Hugging Face model access +- `HF_TOKEN`: Hugging Face token for Llama Firewall security validation +- `PROVIDER`: LLM provider (anthropic, gemini, or mock) +- `MODEL`: LLM model name +- At least one of: `ANTHROPIC_API_KEY` or `GEMINI_API_KEY` (unless using mock) + +#### ๐ŸŸก Feature-Specific (Required for specific integrations) +- **Slack Integration**: `SLACK_BOT_TOKEN`, `SLACK_TEAM_ID` +- **GitHub Integration**: `GITHUB_PERSONAL_ACCESS_TOKEN` +- **AWS EKS**: `AWS_REGION`, `TARGET_EKS_CLUSTER_NAME` +- **GCP GKE**: `CLOUDSDK_CORE_PROJECT`, `TARGET_GKE_CLUSTER_NAME` + +#### ๐ŸŸข Optional (Have sensible defaults) +- `MAX_TOKENS`: Token limit (default: 10000) +- `QUERY_TIMEOUT`: Request timeout (default: 300) +- `GITHUB_ORGANISATION`, `GITHUB_REPO_NAME`, `PROJECT_ROOT`: Repository metadata (defaults provided) +- `SERVICES`, `TOOLS`: Available services and tools (defaults provided) ### Cloud Platform Setup - **AWS**: Credentials must be available at `~/.aws/credentials` for EKS cluster access - **GCP**: Use `gcloud auth login` and `gcloud config set project YOUR_PROJECT_ID` for GKE access ### Credential Setup Script -Use the interactive setup script for easy configuration: +The interactive setup script supports different modes for easy configuration: + ```bash -python setup_credentials.py -# or with platform selection -python setup_credentials.py --platform aws -python setup_credentials.py --platform gcp +# Quick mode - public images, minimal setup (FASTEST - 2-5 minutes!) +uv run python setup_credentials.py --mode quick + +# Essential credentials only (basic functionality) +uv run python setup_credentials.py --mode minimal + +# Quick testing setup (mock LLM, minimal credentials) +uv run python setup_credentials.py --mode testing + +# Complete setup (all features) +uv run python setup_credentials.py --mode full + +# With platform selection +uv run python setup_credentials.py --mode full --platform aws +uv run python setup_credentials.py --mode full --platform gcp ``` +#### Setup Modes Explained: +- **Quick Mode**: Public images, minimal credentials - perfect for trying it out (2-5 minutes!) +- **Minimal Mode**: Essential credentials only - basic LLM functionality without integrations +- **Testing Mode**: Uses mock LLM provider, only requires HF_TOKEN for security validation +- **Full Mode**: All features enabled - Slack, GitHub, Kubernetes integrations + +#### Example .env Templates: +- Copy `.env.testing` for quick local testing +- Copy `.env.minimal` for basic functionality +- Copy `.env.full` for production with all features + ## Service Architecture Details ### Communication Flow @@ -170,9 +239,36 @@ GET http://localhost:8003/health ``` ## Deployment -- **Local**: Docker Compose with local builds (AWS: `compose.aws.yaml`, GCP: `compose.gcp.yaml`) -- **Production AWS**: ECR-based images on AWS EKS (`compose.ecr.yaml`) -- **Production GCP**: GAR-based images on GCP GKE (`compose.gar.yaml`) + +### Deployment Options (by speed) +1. **Public Images (Fastest - 2-5 minutes)**: Use `compose.ghcr.yaml` with pre-built GHCR images +2. **Private Registry (5-10 minutes)**: Use `compose.ecr.yaml` or `compose.gar.yaml` with private images +3. **Local Builds (20-30 minutes)**: Use `compose.aws.yaml` or `compose.gcp.yaml` with local builds + +### Build and Push Script +The enhanced `build_push_docker.sh` script supports multiple registries: + +```bash +# Build and push to different registries +./build_push_docker.sh --ghcr # Push to public GitHub Container Registry +./build_push_docker.sh --aws # Push to private AWS ECR +./build_push_docker.sh --gcp # Push to private GCP GAR +./build_push_docker.sh --dockerhub # Push to Docker Hub +./build_push_docker.sh --local # Build locally only (no push) +``` + +**Requirements for each registry:** +- **GHCR**: `GITHUB_TOKEN` environment variable +- **AWS**: `AWS_ACCOUNT_ID`, `AWS_REGION` in .env + AWS credentials configured +- **GCP**: `CLOUDSDK_*` variables in .env + `gcloud auth login` +- **Docker Hub**: `DOCKER_USERNAME`, `DOCKER_TOKEN` environment variables +- **Local**: No additional requirements + +### Deployment Methods +- **Quick Testing**: `compose.ghcr.yaml` (public images) +- **Local Development**: `compose.aws.yaml` or `compose.gcp.yaml` (local builds) +- **Production AWS**: `compose.ecr.yaml` (private ECR images) +- **Production GCP**: `compose.gar.yaml` (private GAR images) - See [EKS Deployment](https://github.com/fuzzylabs/sre-agent-deployment) for cloud deployment examples ## TypeScript MCP Server Development diff --git a/README.md b/README.md index 2dd0038..247f909 100644 --- a/README.md +++ b/README.md @@ -43,21 +43,62 @@ The SRE Agent supports multiple the following LLM providers: ## ๐Ÿ› ๏ธ Prerequisites +### For Quick Start (fastest way to try it - 2-5 minutes!) - [Docker](https://docs.docker.com/get-docker/) -- A `.env` file in your project root ([see below](#getting-started)) +- A Hugging Face API token ([get one here](https://huggingface.co/settings/tokens)) +- LLM API key (Anthropic or Google Gemini) + +### For Production Mode +- [Docker](https://docs.docker.com/get-docker/) +- A `.env` file in your project root ([see setup below](#quick-start)) - An app deployed on AWS EKS (Elastic Kubernetes Service) or GCP GKE (Google Kubernetes Engine) +- LLM API key (Anthropic or Google Gemini) + +## โšก Quick Start + +### ๐Ÿš€ **Fastest Way to Try SRE Agent (2-5 minutes)** +```bash +# 1. Quick setup with minimal credentials +uv run python setup_credentials.py --mode quick + +# 2. Start with pre-built public images (NO BUILD TIME!) +docker compose -f compose.ghcr.yaml up + +# 3. Test it works +curl -X POST http://localhost:8003/diagnose \ + -H "Authorization: Bearer dev-token-123" \ + -d '{"text": "cartservice"}' +``` + +### ๐Ÿงช **Testing Mode (No cloud setup required!)** +```bash +# 1. Quick testing setup (only needs HF_TOKEN) +uv run python setup_credentials.py --mode testing + +# 2. Start test environment (builds containers locally - takes ~10-15 minutes first time) +docker compose -f compose.tests.yaml up --build + +# 3. Test it works +curl -X POST http://localhost:8003/diagnose \ + -H "Authorization: Bearer dev-token-123" \ + -d '{"text": "cartservice"}' +``` -## โšก Quick Start (5 minutes) +### ๐Ÿญ **Production Mode** -### 1๏ธโƒฃ Set up credentials +#### 1๏ธโƒฃ Set up credentials ```bash -python setup_credentials.py --platform aws # or --platform gcp +# Complete setup with all features +uv run python setup_credentials.py --mode full --platform aws # or --platform gcp + +# OR minimal setup (essential credentials only) +uv run python setup_credentials.py --mode minimal --platform aws ``` -### 2๏ธโƒฃ Configure cloud access +#### 2๏ธโƒฃ Configure cloud access **AWS:** Add credentials to `~/.aws/credentials` | **GCP:** Run `gcloud auth login` -### 3๏ธโƒฃ Deploy with pre-built images (fastest!) +#### 3๏ธโƒฃ Deploy with pre-built images (fastest!) ```bash # AWS ECR (recommended) aws ecr get-login-password --region [YOUR_REGION] | docker login --username AWS --password-stdin $(aws sts get-caller-identity --query Account --output text).dkr.ecr.[YOUR_REGION].amazonaws.com @@ -68,7 +109,7 @@ gcloud auth configure-docker [YOUR_REGION]-docker.pkg.dev docker compose -f compose.gar.yaml up -d ``` -### 4๏ธโƒฃ Test it works +#### 4๏ธโƒฃ Test it works ```bash curl -X POST http://localhost:8003/diagnose \ -H "Authorization: Bearer $(grep DEV_BEARER_TOKEN .env | cut -d'=' -f2)" \ @@ -84,24 +125,39 @@ curl -X POST http://localhost:8003/diagnose \ ### Interactive Credential Setup -Use our interactive setup script to configure your credentials: +Use our interactive setup script with different modes: ```bash -python setup_credentials.py +# Quick mode - public images, minimal setup (FASTEST - 2-5 minutes!) +uv run python setup_credentials.py --mode quick + +# Minimal mode - essential credentials only +uv run python setup_credentials.py --mode minimal --platform aws + +# Testing mode - mock LLM, local builds +uv run python setup_credentials.py --mode testing + +# Full mode - all features enabled +uv run python setup_credentials.py --mode full --platform aws ``` +**Setup Modes:** +- ๐Ÿš€ **Quick**: Public images, minimal credentials - perfect for trying it out (2-5 minutes!) +- โšก **Minimal**: Essential credentials only - basic LLM functionality +- ๐Ÿงช **Testing**: Mock LLM, only requires HF_TOKEN - local builds for testing +- ๐Ÿญ **Full**: Complete setup - Slack, GitHub, Kubernetes integrations + The script will: -- โœ… Auto-detect your platform (AWS/GCP) or let you choose -- โœ… Guide you through credential setup with helpful prompts +- โœ… Guide you through only the credentials you need for your chosen mode +- โœ… Provide sensible defaults where possible - โœ… Show current values and let you update them - โœ… Create your `.env` file automatically -**Quick start with platform selection:** -```bash -python setup_credentials.py --platform aws -# or -python setup_credentials.py --platform gcp -``` +**Quick setup templates:** +You can also copy and customise example files: +- Copy `.env.testing` for testing mode +- Copy `.env.minimal` for basic functionality +- Copy `.env.full` for production setup ### Manual Cloud Credential Setup @@ -129,11 +185,21 @@ gcloud config set project YOUR_PROJECT_ID ## ๐Ÿš€ Deployment Options -### **Recommended: Pre-built Registry Images (2-5 minutes)** +### **Fastest: Public Registry Images (2-5 minutes)** + +Use pre-built public images for instant deployment: + +**GitHub Container Registry (Recommended):** +```bash +# No authentication needed - images are public! +docker compose -f compose.ghcr.yaml up -d +``` + +### **Production: Private Registry Images** -Use pre-built container images for the fastest deployment: +For production deployments with private registries: -**AWS ECR (Fastest):** +**AWS ECR:** ```bash # Authenticate with ECR aws ecr get-login-password --region [YOUR_REGION] | docker login --username AWS --password-stdin $(aws sts get-caller-identity --query Account --output text).dkr.ecr.[YOUR_REGION].amazonaws.com @@ -151,7 +217,29 @@ gcloud auth configure-docker [YOUR_REGION]-docker.pkg.dev docker compose -f compose.gar.yaml up -d ``` -### **Alternative: Local Build (20-30 minutes)** +### **Security: Build Your Own Images** + +For maximum security and trust, build your own images: + +**Build Locally (No Push):** +```bash +./build_push_docker.sh --local +docker compose -f compose.aws.yaml up --build # or compose.gcp.yaml +``` + +**Build and Push to Your Registry:** +```bash +# To your private AWS ECR +./build_push_docker.sh --aws + +# To your private GCP GAR +./build_push_docker.sh --gcp + +# To your GHCR (requires GITHUB_TOKEN) +./build_push_docker.sh --ghcr +``` + +### **Alternative: Local Build from Source (20-30 minutes)** If you need to build from source or modify the code: diff --git a/build_push_docker.sh b/build_push_docker.sh index 9147840..cf8c433 100755 --- a/build_push_docker.sh +++ b/build_push_docker.sh @@ -1,10 +1,21 @@ #!/bin/bash set -euo pipefail -source .env + +# Source .env if it exists (optional for some targets) +if [[ -f .env ]]; then + source .env +fi usage() { - echo "usage: <--aws|--gcp>" + echo "usage: <--aws|--gcp|--ghcr|--dockerhub|--local>" + echo "" + echo "Registry Options:" + echo " --aws Push to AWS ECR (requires AWS_ACCOUNT_ID, AWS_REGION in .env)" + echo " --gcp Push to GCP GAR (requires CLOUDSDK_* vars in .env)" + echo " --ghcr Push to GitHub Container Registry (requires GITHUB_TOKEN)" + echo " --dockerhub Push to Docker Hub (requires DOCKER_USERNAME, DOCKER_TOKEN)" + echo " --local Build locally only, no push" } if [[ $@ == "" ]]; then @@ -13,37 +24,67 @@ if [[ $@ == "" ]]; then exit 1 fi +REGISTRY_TARGET="" for arg in "$@"; do shift case "$arg" in - '--aws') CLOUD_PROVIDER="AWS" ;; - '--gcp') CLOUD_PROVIDER="GCP" ;; - *) CLOUD_PROVIDER="UNKNOWN" ;; + '--aws') REGISTRY_TARGET="AWS" ;; + '--gcp') REGISTRY_TARGET="GCP" ;; + '--ghcr') REGISTRY_TARGET="GHCR" ;; + '--dockerhub') REGISTRY_TARGET="DOCKERHUB" ;; + '--local') REGISTRY_TARGET="LOCAL" ;; + *) REGISTRY_TARGET="UNKNOWN" ;; esac done -if [[ $CLOUD_PROVIDER == "AWS" ]]; then +# Authentication and setup based on registry target +if [[ $REGISTRY_TARGET == "AWS" ]]; then : "${AWS_ACCOUNT_ID:?Environment variable AWS_ACCOUNT_ID not set}" : "${AWS_REGION:?Environment variable AWS_REGION not set}" + echo "Target: AWS ECR" echo "Account ID: $AWS_ACCOUNT_ID" echo "Region: $AWS_REGION" - echo "Authenticating with ECR." + echo "Authenticating with ECR..." aws ecr get-login-password --region "$AWS_REGION" | \ docker login --username AWS --password-stdin "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com" -elif [[ $CLOUD_PROVIDER == "GCP" ]]; then + +elif [[ $REGISTRY_TARGET == "GCP" ]]; then : "${CLOUDSDK_CORE_PROJECT:?Environment variable CLOUDSDK_CORE_PROJECT not set}" : "${CLOUDSDK_COMPUTE_REGION:?Environment variable CLOUDSDK_COMPUTE_REGION not set}" + echo "Target: GCP GAR" echo "Project ID: $CLOUDSDK_CORE_PROJECT" echo "Region: $CLOUDSDK_COMPUTE_REGION" - echo "Authenticating with GAR." - + echo "Authenticating with GAR..." gcloud auth configure-docker "${CLOUDSDK_COMPUTE_REGION}-docker.pkg.dev" --quiet + +elif [[ $REGISTRY_TARGET == "GHCR" ]]; then + : "${GITHUB_TOKEN:?Environment variable GITHUB_TOKEN not set. Get one at https://github.com/settings/tokens}" + + echo "Target: GitHub Container Registry (GHCR)" + echo "Repository: ghcr.io/fuzzylabs/sre-agent-*" + + echo "Authenticating with GHCR..." + echo "$GITHUB_TOKEN" | docker login ghcr.io --username "$(gh api user --jq .login)" --password-stdin + +elif [[ $REGISTRY_TARGET == "DOCKERHUB" ]]; then + : "${DOCKER_USERNAME:?Environment variable DOCKER_USERNAME not set}" + : "${DOCKER_TOKEN:?Environment variable DOCKER_TOKEN not set}" + + echo "Target: Docker Hub" + echo "Username: $DOCKER_USERNAME" + + echo "Authenticating with Docker Hub..." + echo "$DOCKER_TOKEN" | docker login --username "$DOCKER_USERNAME" --password-stdin + +elif [[ $REGISTRY_TARGET == "LOCAL" ]]; then + echo "Target: Local build only (no push)" + else - echo "Unknown cloud provider" + echo "Unknown registry target: $REGISTRY_TARGET" usage exit 1 fi @@ -53,24 +94,66 @@ build_and_push() { local dockerfile=$2 local context=$3 - echo "Building ${name} MCP Server." - docker build -t mcp/${name} -f ${dockerfile} ${context} --platform linux/amd64 + echo "Building ${name} service..." + docker build -t sre-agent/${name}:latest -f ${dockerfile} ${context} --platform linux/amd64 - if [[ $CLOUD_PROVIDER == "AWS" ]]; then - local image_tag="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/mcp/${name}:dev" - else - local image_tag="${CLOUDSDK_COMPUTE_REGION}-docker.pkg.dev/${CLOUDSDK_CORE_PROJECT}/mcp/${name}:dev" - fi - docker tag mcp/${name}:latest "${image_tag}" + # Determine image tag based on registry target + local image_tag="" + case $REGISTRY_TARGET in + "AWS") + image_tag="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/mcp/${name}:latest" + ;; + "GCP") + image_tag="${CLOUDSDK_COMPUTE_REGION}-docker.pkg.dev/${CLOUDSDK_CORE_PROJECT}/mcp/${name}:dev" + ;; + "GHCR") + image_tag="ghcr.io/fuzzylabs/sre-agent-${name}:latest" + ;; + "DOCKERHUB") + image_tag="fuzzylabs/sre-agent-${name}:latest" + ;; + "LOCAL") + echo "โœ… Built ${name} locally (tagged as sre-agent/${name}:latest)" + return 0 + ;; + esac - echo "Pushing ${name} MCP Server to ECR." + # Tag and push to registry + docker tag sre-agent/${name}:latest "${image_tag}" + echo "Pushing ${name} to ${REGISTRY_TARGET}..." docker push "${image_tag}" + echo "โœ… Pushed ${image_tag}" } +# Build and push all services +echo "๐Ÿš€ Starting build and push process for all SRE Agent services..." + build_and_push "github" "sre_agent/servers/github/Dockerfile" "sre_agent/" build_and_push "kubernetes" "sre_agent/servers/mcp-server-kubernetes/Dockerfile" "sre_agent/servers/mcp-server-kubernetes" build_and_push "slack" "sre_agent/servers/slack/Dockerfile" "sre_agent/" -build_and_push "sre-orchestrator" "sre_agent/client/Dockerfile" "." +build_and_push "orchestrator" "sre_agent/client/Dockerfile" "." build_and_push "llm-server" "sre_agent/llm/Dockerfile" "." build_and_push "prompt-server" "sre_agent/servers/prompt_server/Dockerfile" "." -build_and_push "llama-firewall" "sre_agent/servers/llama-firewall/Dockerfile" "." +build_and_push "llama-firewall" "sre_agent/firewall/Dockerfile" "." + +echo "" +echo "๐ŸŽ‰ All services built and pushed successfully!" +if [[ $REGISTRY_TARGET != "LOCAL" ]]; then + echo "๐Ÿ“‹ Images available at:" + case $REGISTRY_TARGET in + "AWS") + echo " ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/mcp/*:latest" + ;; + "GCP") + echo " ${CLOUDSDK_COMPUTE_REGION}-docker.pkg.dev/${CLOUDSDK_CORE_PROJECT}/mcp/*:dev" + ;; + "GHCR") + echo " ghcr.io/fuzzylabs/sre-agent-*:latest" + ;; + "DOCKERHUB") + echo " fuzzylabs/sre-agent-*:latest" + ;; + esac +else + echo "๐Ÿ“‹ Local images tagged as: sre-agent/*:latest" +fi diff --git a/compose.ghcr.yaml b/compose.ghcr.yaml new file mode 100644 index 0000000..31c8850 --- /dev/null +++ b/compose.ghcr.yaml @@ -0,0 +1,117 @@ +# GHCR Public Registry Compose Configuration +# Purpose: Fast testing and evaluation using pre-built public images +# Deployment time: ~2-5 minutes vs 10-15 minutes for local builds +# Authentication: No registry authentication required +# Full functionality: All MCP servers included + +services: + slack: + image: ghcr.io/fuzzylabs/sre-agent-slack:latest + environment: + - SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} + - SLACK_TEAM_ID=${SLACK_TEAM_ID} + - TRANSPORT=SSE + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "3001"] + interval: 5s + timeout: 3s + retries: 5 + + kubernetes: + image: ghcr.io/fuzzylabs/sre-agent-kubernetes:latest + volumes: + - ~/.aws:/home/appuser/.aws + - ~/.config/gcloud:/home/appuser/.config/gcloud + environment: + - TRANSPORT=SSE + - AWS_REGION=${AWS_REGION} + - TARGET_EKS_CLUSTER_NAME=${TARGET_EKS_CLUSTER_NAME} + - CLOUDSDK_CORE_PROJECT=${CLOUDSDK_CORE_PROJECT} + - CLOUDSDK_COMPUTE_REGION=${CLOUDSDK_COMPUTE_REGION} + - TARGET_GKE_CLUSTER_NAME=${TARGET_GKE_CLUSTER_NAME} + - GOOGLE_APPLICATION_CREDENTIALS=/home/appuser/.config/gcloud/application_default_credentials.json + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "3001"] + interval: 5s + timeout: 3s + retries: 5 + + github: + image: ghcr.io/fuzzylabs/sre-agent-github:latest + environment: + - GITHUB_PERSONAL_ACCESS_TOKEN=${GITHUB_PERSONAL_ACCESS_TOKEN} + - TRANSPORT=SSE + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "3001"] + interval: 5s + timeout: 3s + retries: 5 + + prompt-server: + image: ghcr.io/fuzzylabs/sre-agent-prompt-server:latest + environment: + - GITHUB_ORGANISATION=${GITHUB_ORGANISATION} + - GITHUB_REPO_NAME=${GITHUB_REPO_NAME} + - PROJECT_ROOT=${PROJECT_ROOT} + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "3001"] + interval: 5s + timeout: 3s + retries: 5 + + llm-server: + image: ghcr.io/fuzzylabs/sre-agent-llm-server:latest + environment: + - PROVIDER=${PROVIDER} + - MODEL=${MODEL} + - MAX_TOKENS=${MAX_TOKENS} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + - GEMINI_API_KEY=${GEMINI_API_KEY} + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "8000"] + interval: 5s + timeout: 3s + retries: 5 + + llama-firewall: + image: ghcr.io/fuzzylabs/sre-agent-llama-firewall:latest + volumes: + - source: ~/.cache/huggingface + target: /root/.cache/huggingface + type: bind + bind: + create_host_path: true + environment: + - HF_TOKEN=${HF_TOKEN} + ports: + - "8000:8000" + healthcheck: + test: ["CMD", "nc", "-z", "localhost", "8000"] + interval: 5s + timeout: 3s + retries: 5 + + orchestrator: + image: ghcr.io/fuzzylabs/sre-agent-orchestrator:latest + ports: + - "8003:80" + depends_on: + slack: + condition: service_healthy + github: + condition: service_healthy + kubernetes: + condition: service_healthy + prompt-server: + condition: service_healthy + llm-server: + condition: service_healthy + llama-firewall: + condition: service_healthy + environment: + - DEV_BEARER_TOKEN=${DEV_BEARER_TOKEN} + - QUERY_TIMEOUT=${QUERY_TIMEOUT} + - SLACK_SIGNING_SECRET=${SLACK_SIGNING_SECRET} + - TOOLS=${TOOLS} + - SLACK_CHANNEL_ID=${SLACK_CHANNEL_ID} + - SERVICES=${SERVICES} diff --git a/compose.tests.yaml b/compose.tests.yaml index 1f56177..f77f578 100644 --- a/compose.tests.yaml +++ b/compose.tests.yaml @@ -1,3 +1,9 @@ +# Testing Compose Configuration +# Purpose: Local development and CI testing with mock LLM provider +# Requirements: Only HF_TOKEN needed (no cloud credentials) +# Build time: ~10-15 minutes first time, faster on subsequent runs +# Missing services: Slack, GitHub, Kubernetes MCP servers (not needed for basic testing) + services: prompt-server: build: diff --git a/docs/credentials.md b/docs/credentials.md index ed576ea..7c684e8 100644 --- a/docs/credentials.md +++ b/docs/credentials.md @@ -71,15 +71,27 @@ gcloud container clusters get-credentials YOUR_CLUSTER_NAME --region YOUR_REGION ## ๐Ÿš€ Quick Setup -Use our interactive setup script to configure all credentials: +Use our interactive setup script to configure credentials with different modes: ```bash -python setup_credentials.py +# Testing mode - minimal setup, mock LLM (fastest!) +uv run python setup_credentials.py --mode testing + +# Minimal mode - essential credentials only +uv run python setup_credentials.py --mode minimal --platform aws + +# Full mode - all features enabled +uv run python setup_credentials.py --mode full --platform aws ``` +**Setup Modes:** +- ๐Ÿงช **Testing**: Mock LLM, only requires HF_TOKEN - perfect for trying it out +- โšก **Minimal**: Essential credentials only - basic LLM functionality +- ๐Ÿš€ **Full**: Complete setup - Slack, GitHub, Kubernetes integrations + The script will: -- Auto-detect your platform (AWS/GCP) or let you choose -- Guide you through each credential with helpful prompts +- Guide you through only the credentials you need for your chosen mode +- Provide sensible defaults where possible - Show current values (masked for security) and let you update them - Create your `.env` file automatically @@ -87,7 +99,7 @@ The script will: ```bash # For AWS -python setup_credenitals.py --platform aws +uv run python setup_credentials.py --platform aws # For GCP -python setup_credenitals.py --platform gcp +uv run python setup_credentials.py --platform gcp diff --git a/setup_credentials.py b/setup_credentials.py index 879068b..f602e1f 100644 --- a/setup_credentials.py +++ b/setup_credentials.py @@ -41,117 +41,218 @@ def read_env_file(filename: str = ".env") -> dict[str, str]: return env_vars -def get_credential_config(platform: str) -> dict[str, dict[str, Any]]: - """Get the credential configuration for the specified platform. +def get_credential_config( + platform: str, mode: str = "full" +) -> dict[str, dict[str, Any]]: + """Get the credential configuration for the specified platform and mode. Config structure: - mask_value: When displaying existing values, show masked (True) vs full value (False) + - tier: essential, feature_specific, or optional + - default: default value if any """ - # Common credentials for both platforms - common_creds = { + # Essential credentials - required for basic functionality + essential_creds = { + "DEV_BEARER_TOKEN": { + "prompt": "Enter a bearer token (password) for developers to " + "directly invoke the agent via the `/diagnose` endpoint. " + "(This can be anything): ", + "mask_value": True, + "tier": "essential", + "default": "dev-token-123", + }, + "HF_TOKEN": { + "prompt": "Enter your Hugging Face API token (for Llama Firewall). " + "Get one at https://huggingface.co/settings/tokens with read access " + "to meta-llama/Llama-Prompt-Guard-2-86M: ", + "mask_value": True, + "tier": "essential", + }, + "PROVIDER": { + "prompt": "Enter your LLM provider name (anthropic/gemini/mock): ", + "mask_value": False, + "tier": "essential", + "default": "mock", + }, + "MODEL": { + "prompt": "Enter your LLM model name: ", + "mask_value": False, + "tier": "essential", + "default": "claude-3-5-sonnet-20241022", + }, + } + + # LLM provider credentials - at least one required + llm_creds = { + "ANTHROPIC_API_KEY": { + "prompt": "Enter your Anthropic API Key " + "(required if using anthropic provider): ", + "mask_value": True, + "tier": "essential_conditional", + }, + "GEMINI_API_KEY": { + "prompt": "Enter your Gemini API Key " + "(required if using gemini provider): ", + "mask_value": True, + "tier": "essential_conditional", + }, + } + + # Feature-specific credentials + feature_creds = { "SLACK_BOT_TOKEN": { - "prompt": "Enter your Slack Bot Token. If you haven't set up a Slack " - "app yet, check out this article https://api.slack.com/apps " - "to create one: ", + "prompt": "Enter your Slack Bot Token (for notifications). " + "If you haven't set up a Slack app yet, check out this article " + "https://api.slack.com/apps to create one: ", "mask_value": True, + "tier": "feature_specific", }, + "SLACK_TEAM_ID": { + "prompt": "Enter your Slack Team ID (for notifications): ", + "mask_value": False, + "tier": "feature_specific", + }, + "GITHUB_PERSONAL_ACCESS_TOKEN": { + "prompt": "Enter your Github Personal Access Token " + "(for repository access): ", + "mask_value": True, + "tier": "feature_specific", + }, + } + + # Optional credentials with defaults + optional_creds = { "SLACK_SIGNING_SECRET": { "prompt": "Enter the signing secret associated with the Slack " "`sre-agent` application: ", "mask_value": True, + "tier": "optional", + "default": "null", }, - "SLACK_TEAM_ID": {"prompt": "Enter your Slack Team ID: ", "mask_value": False}, "SLACK_CHANNEL_ID": { "prompt": "Enter your Slack Channel ID: ", "mask_value": False, - }, - "GITHUB_PERSONAL_ACCESS_TOKEN": { - "prompt": "Enter your Github Personal Access Token: ", - "mask_value": True, + "tier": "optional", + "default": "null", }, "GITHUB_ORGANISATION": { "prompt": "Enter your Github organisation name: ", "mask_value": False, + "tier": "optional", + "default": "fuzzylabs", }, "GITHUB_REPO_NAME": { "prompt": "Enter your Github repository name: ", "mask_value": False, + "tier": "optional", + "default": "microservices-demo", }, "PROJECT_ROOT": { "prompt": "Enter your Github project root directory: ", "mask_value": False, - }, - "PROVIDER": {"prompt": "Enter your LLM provider name: ", "mask_value": False}, - "MODEL": {"prompt": "Enter your LLM model name: ", "mask_value": False}, - "GEMINI_API_KEY": {"prompt": "Enter your Gemini API Key: ", "mask_value": True}, - "ANTHROPIC_API_KEY": { - "prompt": "Enter your Anthropic API Key: ", - "mask_value": True, + "tier": "optional", + "default": "src", }, "MAX_TOKENS": { "prompt": "Controls the maximum number of tokens the LLM can generate in " "its response e.g. 10000: ", "mask_value": False, + "tier": "optional", + "default": "10000", }, - "DEV_BEARER_TOKEN": { - "prompt": "Enter a bearer token (password) for developers to " - "directly invoke the agent via the `/diagnose` endpoint. " - "(This can be anything): ", - "mask_value": True, - }, - "HF_TOKEN": { - "prompt": "Enter your Hugging Face API token, ensure this has read " - "access to https://huggingface.co/meta-llama/" - "Llama-Prompt-Guard-2-86M, read the following article " - "(https://huggingface.co/docs/hub/en/security-tokens) " - "to set up this token: ", - "mask_value": True, + "QUERY_TIMEOUT": { + "prompt": "Enter your query timeout in seconds (e.g. 300): ", + "mask_value": False, + "tier": "optional", + "default": "300", }, } - if platform == "aws": + # Combine credentials based on mode + if mode == "quick": + # Quick mode: minimal credentials for public image deployment + quick_creds = { + **essential_creds, + **llm_creds, + "GITHUB_ORGANISATION": optional_creds["GITHUB_ORGANISATION"], + "GITHUB_REPO_NAME": optional_creds["GITHUB_REPO_NAME"], + "PROJECT_ROOT": optional_creds["PROJECT_ROOT"], + } + # Set reasonable defaults for quick deployment + quick_creds["DEV_BEARER_TOKEN"]["default"] = "dev-token-123" + common_creds = quick_creds + elif mode == "minimal": + common_creds = {**essential_creds, **llm_creds} + elif mode == "testing": + # For testing, use mock provider and minimal setup + testing_creds = { + **essential_creds, + "GITHUB_ORGANISATION": optional_creds["GITHUB_ORGANISATION"], + "GITHUB_REPO_NAME": optional_creds["GITHUB_REPO_NAME"], + "PROJECT_ROOT": optional_creds["PROJECT_ROOT"], + } + # Override defaults for testing + testing_creds["PROVIDER"]["default"] = "mock" + testing_creds["MODEL"]["default"] = "mock-model" + common_creds = testing_creds + else: # full mode + common_creds = { + **essential_creds, + **llm_creds, + **feature_creds, + **optional_creds, + } + + # Platform-specific credentials (only added in full mode unless minimal AWS/GCP + # testing) + if platform == "aws" and mode != "testing": aws_specific = { - "AWS_REGION": {"prompt": "Enter your AWS region: ", "mask_value": False}, + "AWS_REGION": { + "prompt": "Enter your AWS region: ", + "mask_value": False, + "tier": "feature_specific", + "default": "us-east-1", + }, "AWS_ACCOUNT_ID": { "prompt": "Enter your AWS account ID: ", "mask_value": False, + "tier": "feature_specific", }, "TARGET_EKS_CLUSTER_NAME": { "prompt": "Enter your target EKS cluster name (the cluster the " "agent will interact with): ", "mask_value": False, + "tier": "feature_specific", }, } - return {**common_creds, **aws_specific} + if mode == "full": + common_creds.update(aws_specific) - elif platform == "gcp": + elif platform == "gcp" and mode != "testing": gcp_specific = { - "QUERY_TIMEOUT": { - "prompt": "Enter your query timeout (e.g. 300): ", - "mask_value": False, - }, "CLOUDSDK_CORE_PROJECT": { "prompt": "Enter your GCP project ID: ", "mask_value": False, + "tier": "feature_specific", }, "CLOUDSDK_COMPUTE_REGION": { "prompt": "Enter your GCP region: ", "mask_value": False, + "tier": "feature_specific", + "default": "us-central1", }, "TARGET_GKE_CLUSTER_NAME": { "prompt": "Enter your target GKE cluster name (the cluster the " "agent will interact with): ", "mask_value": False, + "tier": "feature_specific", }, } - return {**common_creds, **gcp_specific} + if mode == "full": + common_creds.update(gcp_specific) - else: - raise ValueError( - f"Unsupported platform: {platform}. Supported " - "platforms are 'aws' and 'gcp'." - ) + return common_creds def display_current_credentials( @@ -175,15 +276,21 @@ def display_current_credentials( def get_credential_input( - prompt: str, current_value: Optional[str] = None, mask_value: bool = True + prompt: str, + current_value: Optional[str] = None, + mask_value: bool = True, + default_value: Optional[str] = None, ) -> str: """Get credential input from user, showing current value if it exists.""" - if current_value: + display_value = current_value or default_value + + if display_value: # Show the current value (masked or unmasked based on mask_value) - displayed_current = mask_credential(current_value, mask_value) + displayed_current = mask_credential(display_value, mask_value) + value_type = "Current value" if current_value else "Default" display_prompt = ( - f"{prompt}\nCurrent value: {displayed_current}\n" - "Press Enter to keep current value, or enter new value: " + f"{prompt}\n{value_type}: {displayed_current}\n" + f"Press Enter to keep {value_type.lower()}, or enter new value: " ) else: display_prompt = prompt @@ -191,9 +298,9 @@ def get_credential_input( # Use regular input for all inputs new_value = input(display_prompt) - # If user pressed Enter and there's a current value, keep it - if not new_value and current_value: - return current_value + # If user pressed Enter and there's a current/default value, keep it + if not new_value and display_value: + return current_value or default_value or "" return new_value @@ -218,31 +325,104 @@ def handle_comma_separated_input( return str(user_input.split(",")) if user_input else str([]) -def get_platform_credentials( - platform: str, existing_creds: dict[str, str] +def get_platform_credentials( # noqa: C901 + platform: str, existing_creds: dict[str, str], mode: str = "full" ) -> dict[str, str]: - """Get credentials for the specified platform.""" - print(f"Setting up {platform.upper()} credentials...") + """Get credentials for the specified platform and mode.""" + print(f"Setting up {platform.upper()} credentials in {mode} mode...") credentials = {} - creds_config = get_credential_config(platform) + creds_config = get_credential_config(platform, mode) - # Process standard credentials - for key, config in creds_config.items(): - credentials[key] = get_credential_input( - config["prompt"], existing_creds.get(key), config["mask_value"] - ) + # Group credentials by tier for better UX + essential_creds = { + k: v for k, v in creds_config.items() if v.get("tier") == "essential" + } + conditional_creds = { + k: v + for k, v in creds_config.items() + if v.get("tier") == "essential_conditional" + } + feature_creds = { + k: v for k, v in creds_config.items() if v.get("tier") == "feature_specific" + } + optional_creds = { + k: v for k, v in creds_config.items() if v.get("tier") == "optional" + } - # Handle special cases for comma-separated values - credentials["SERVICES"] = handle_comma_separated_input( - "SERVICES", - "Enter the services running on the cluster (comma-separated)", - existing_creds, - ) + # Process essential credentials first + if essential_creds: + print("\n๐Ÿ“‹ Essential credentials for basic functionality:") + for key, config in essential_creds.items(): + credentials[key] = get_credential_input( + config["prompt"], + existing_creds.get(key), + config["mask_value"], + config.get("default"), + ) - credentials["TOOLS"] = handle_comma_separated_input( - "TOOLS", "Enter the tools you want to utilise (comma-separated)", existing_creds - ) + # Handle LLM provider credentials with validation + if conditional_creds and mode in ["full", "minimal"]: + print("\n๐Ÿ”‘ LLM Provider credentials (at least one required):") + provider = credentials.get("PROVIDER", "").lower() + + for key, config in conditional_creds.items(): + if ( + (provider == "anthropic" and "ANTHROPIC" in key) + or (provider == "gemini" and "GEMINI" in key) + or provider in ["mock", ""] + ): + credentials[key] = get_credential_input( + config["prompt"], + existing_creds.get(key), + config["mask_value"], + config.get("default"), + ) + + # Process feature-specific credentials + if feature_creds and mode == "full": + print("\n๐Ÿ”ง Feature-specific credentials (optional - skip if not needed):") + for key, config in feature_creds.items(): + credentials[key] = get_credential_input( + config["prompt"], + existing_creds.get(key), + config["mask_value"], + config.get("default"), + ) + + # Process optional credentials + if optional_creds and mode == "full": + print("\nโš™๏ธ Additional configuration (optional - defaults will be used):") + for key, config in optional_creds.items(): + credentials[key] = get_credential_input( + config["prompt"], + existing_creds.get(key), + config["mask_value"], + config.get("default"), + ) + + # Handle special cases for comma-separated values (only in full mode) + if mode == "full": + credentials["SERVICES"] = handle_comma_separated_input( + "SERVICES", + "Enter the services running on the cluster (comma-separated)", + existing_creds, + ) + + credentials["TOOLS"] = handle_comma_separated_input( + "TOOLS", + "Enter the tools you want to utilise (comma-separated)", + existing_creds, + ) + else: + # Use defaults for testing/minimal modes + credentials["SERVICES"] = existing_creds.get( + "SERVICES", '["cartservice", "adservice", "emailservice"]' + ) + credentials["TOOLS"] = existing_creds.get( + "TOOLS", + '["list_pods", "get_logs", "get_file_contents", "slack_post_message"]', + ) return credentials @@ -277,7 +457,7 @@ def create_env_file(credentials: dict[str, str], filename: str = ".env") -> None print(f"{filename} file created successfully.") -def main() -> None: +def main() -> None: # noqa: C901, PLR0912, PLR0915 """Main function to set up credentials.""" parser = argparse.ArgumentParser(description="SRE Agent Credential Setup") parser.add_argument( @@ -285,18 +465,65 @@ def main() -> None: choices=["aws", "gcp"], help="Specify platform (aws/gcp) to skip platform selection", ) + parser.add_argument( + "--mode", + choices=["quick", "minimal", "testing", "full"], + help="Setup mode: quick (GHCR public images), minimal (essential only), " + "testing (mock setup), full (all features)", + default="full", + ) args = parser.parse_args() print("=== SRE Agent Credential Setup ===") print("This script will help you set up credentials for running the agent locally.") + # Explain modes if not specified + if not args.mode or args.mode == "full": + print("\n๐ŸŽฏ Setup Modes:") + print( + " โ€ข quick - Use public images, minimal setup (FASTEST - 2-5 minutes!)" + ) + print(" โ€ข minimal - Essential credentials only (for basic testing)") + print(" โ€ข testing - Mock setup (no real API keys needed)") + print(" โ€ข full - Complete setup (all features)") + + mode_choice = ( + input("\nChoose setup mode (quick/minimal/testing/full) [quick]: ") + .strip() + .lower() + ) + mode = ( + mode_choice + if mode_choice in ["quick", "minimal", "testing", "full"] + else "quick" + ) + else: + mode = args.mode + + print(f"\n๐Ÿ”ง Setup mode: {mode.upper()}") + + if mode == "quick": + print(" ๐Ÿš€ Using public images - fastest deployment!") + print(" ๐Ÿ’ก Deploy with: docker compose -f compose.ghcr.yaml up") + elif mode == "testing": + print(" Using mock provider - no real API keys required!") + elif mode == "minimal": + print(" Only essential credentials - basic functionality only") + else: + print(" Complete setup - all features will be available") + # Read existing credentials existing_creds = read_env_file() - # Ask for platform choice first to get the right config + # Platform selection logic platform = args.platform - if not platform: + if mode in ["testing", "quick"]: + # For testing/quick modes, default to aws if not specified + platform = platform or "aws" + if mode == "quick": + print(f"\n๐ŸŒ Platform: {platform.upper()} (can be changed later)") + elif not platform: detected_platform = detect_platform_from_env(existing_creds) if detected_platform: use_detected = ( @@ -326,22 +553,41 @@ def main() -> None: # Show existing credentials if any if existing_creds: - creds_config = get_credential_config(platform) + creds_config = get_credential_config(platform, mode) display_current_credentials(existing_creds, creds_config) - # Get credentials based on platform - credentials = get_platform_credentials(platform, existing_creds) + # Get credentials based on platform and mode + credentials = get_platform_credentials(platform, existing_creds, mode) # Create .env file create_env_file(credentials) print("\nโœ… Credentials saved to .env file!") print("\n๐Ÿš€ Next steps:") - print(" Start the containers manually with:") - if platform == "aws": - print(" docker compose -f compose.aws.yaml up") - elif platform == "gcp": - print(" docker compose -f compose.gcp.yaml up") + + if mode == "quick": + print(" Start with public images (FASTEST - 2-5 minutes):") + print(" docker compose -f compose.ghcr.yaml up") + print("\n ๐Ÿ’ก For production or security, build your own images:") + print(" ./build_push_docker.sh --local") + if platform == "aws": + print(" docker compose -f compose.aws.yaml up --build") + elif platform == "gcp": + print(" docker compose -f compose.gcp.yaml up --build") + elif mode == "testing": + print(" Start the test containers with:") + print(" docker compose -f compose.tests.yaml up") + else: + print(" Start the containers with:") + if platform == "aws": + print(" docker compose -f compose.aws.yaml up") + elif platform == "gcp": + print(" docker compose -f compose.gcp.yaml up") + + print( + f"\n๐Ÿ’ก Tip: You can run 'uv run python setup_credentials.py --mode {mode}' " + f"again to use the same mode" + ) if __name__ == "__main__": diff --git a/sre_agent/client/utils/schemas.py b/sre_agent/client/utils/schemas.py index b4b4e48..13b2512 100644 --- a/sre_agent/client/utils/schemas.py +++ b/sre_agent/client/utils/schemas.py @@ -25,7 +25,28 @@ def _validate_fields(self: DataclassInstance) -> None: attr = getattr(self, config.name) if not attr: - msg = f"Environment variable {config.name.upper()} is not set." + env_var = config.name.upper() + + # Provide helpful context for missing credentials + if env_var == "DEV_BEARER_TOKEN": + msg = ( + f"Environment variable {env_var} is not set. " + "This is required for API authentication. " + "Run 'uv run python setup_credentials.py --mode minimal' " + "to configure." + ) + elif env_var == "SLACK_CHANNEL_ID": + msg = ( + f"Environment variable {env_var} is not set. " + "This is required for Slack notifications. " + "Either configure Slack integration or use minimal mode." + ) + else: + msg = ( + f"Environment variable {env_var} is not set. " + "Run 'uv run python setup_credentials.py' to configure credentials." + ) + logger.error(msg) raise ValueError(msg) diff --git a/sre_agent/llm/utils/clients.py b/sre_agent/llm/utils/clients.py index 05d3e9a..bdb4f4b 100644 --- a/sre_agent/llm/utils/clients.py +++ b/sre_agent/llm/utils/clients.py @@ -72,7 +72,14 @@ class AnthropicClient(BaseClient): def __init__(self, settings: LLMSettings = LLMSettings()) -> None: """The constructor for the Anthropic client.""" super().__init__(settings) - self.client = Anthropic() + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError( + "ANTHROPIC_API_KEY environment variable is not set. " + "This is required when using the Anthropic provider. " + "Run 'uv run python setup_credentials.py --mode minimal' to configure." + ) + self.client = Anthropic(api_key=api_key) @staticmethod def _add_cache_to_final_block( @@ -176,7 +183,14 @@ class GeminiClient(BaseClient): def __init__(self, settings: LLMSettings = LLMSettings()) -> None: """The constructor for the Gemini client.""" super().__init__(settings) - self.client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) + api_key = os.getenv("GEMINI_API_KEY") + if not api_key: + raise ValueError( + "GEMINI_API_KEY environment variable is not set. " + "This is required when using the Gemini provider. " + "Run 'uv run python setup_credentials.py --mode minimal' to configure." + ) + self.client = genai.Client(api_key=api_key) def generate(self, payload: TextGenerationPayload) -> Message: """A method for generating text using the Gemini API."""