diff --git a/.github/workflows/setup-github-projects_Version3.yml b/.github/workflows/setup-github-projects_Version3.yml new file mode 100644 index 0000000..a99d2f3 --- /dev/null +++ b/.github/workflows/setup-github-projects_Version3.yml @@ -0,0 +1,130 @@ +name: Setup GitHub Project Automations + +on: + push: + branches: + - main + schedule: + - cron: "0 0 * * 0" # Every Sunday at midnight UTC + issues: + types: [opened, reopened] + pull_request: + types: [opened, reopened] + workflow_dispatch: + +jobs: + setup-project: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up GitHub CLI + run: | + sudo apt-get update + sudo apt-get install -y gh + # Verify gh CLI version is compatible (>=2.21.0) for native 'gh project' commands + version=$(gh --version | head -n1 | awk '{print $3}' 2>/dev/null || echo "0.0.0") + major=$(echo "$version" | cut -d. -f1) + minor=$(echo "$version" | cut -d. -f2) + if [ "$major" -lt 2 ] || { [ "$major" -eq 2 ] && [ "$minor" -lt 21 ]; }; then + echo "gh CLI >= 2.21.0 required. Current: $version" >&2 + exit 1 + fi + + - name: Authenticate GH CLI + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: gh auth login --with-token <<< "${GH_TOKEN}" + + - name: Create Project with description + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PROJECT_NAME="Auto Project" + PROJECT_DESC="Automated project for task management with weekly iterations" + + # Check if project exists + proj_list=$(gh project list --owner ${{ github.repository_owner }} --format json 2>&1) + rc=$? + if [ $rc -ne 0 ]; then + echo "gh project list failed: $proj_list" >&2 + exit $rc + fi + PROJECT_ID=$(echo "$proj_list" | jq -r "[.[] | select(.title == \"$PROJECT_NAME\") | .number] | first // empty") + + if [ -z "$PROJECT_ID" ]; then + echo "Creating new project..." + gh project create "$PROJECT_NAME" --owner ${{ github.repository_owner }} --body "$PROJECT_DESC" + + # Re-check to ensure we obtained an ID, fail explicitly if not found + proj_list=$(gh project list --owner ${{ github.repository_owner }} --format json 2>&1) + rc=$? + if [ $rc -ne 0 ]; then + echo "gh project list failed after create: $proj_list" >&2 + exit $rc + fi + PROJECT_ID=$(echo "$proj_list" | jq -r "[.[] | select(.title == \"$PROJECT_NAME\") | .number] | first // empty") + if [ -z "$PROJECT_ID" ]; then + echo "Failed to obtain PROJECT_ID for $PROJECT_NAME" >&2 + exit 1 + fi + fi + + echo "PROJECT_ID=$PROJECT_ID" >> $GITHUB_ENV + + - name: Reminder - Create views manually + run: | + echo "NOTE: GitHub Projects v2 views cannot be created via the gh CLI." + echo "Please create the following views manually in the GitHub web UI:" + echo " 1. 'Tasks Table' (Table view)" + echo " 2. 'Kanban Board' (Board view)" + echo " 3. 'Schedule Timeline' (Timeline view)" + + - name: Backfill existing open issues + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PROJECT_ID=${{ env.PROJECT_ID }} + REPO="${{ github.repository }}" + + # Fetch all open issues via paginated API + failures=0 + issues_json=$(gh api --paginate "repos/$REPO/issues?state=open&per_page=100" --jq '.[].number' 2>&1) + rc=$? + if [ $rc -ne 0 ]; then + echo "Error listing issues for $REPO: $issues_json" >&2 + exit $rc + fi + + while read -r issue_num; do + [ -z "$issue_num" ] && continue + ISSUE_URL="https://github.com/$REPO/issues/$issue_num" + err=$(gh project item-add "$PROJECT_ID" --url "$ISSUE_URL" 2>&1) || { + echo "Error adding issue $issue_num to project $PROJECT_ID: $err" >&2 + failures=$((failures+1)) + } + done <<< "$issues_json" + + if [ "$failures" -gt 0 ]; then + echo "Summary: $failures issue(s) failed to add to project $PROJECT_ID" >&2 + fi + + - name: Add new issues/PRs to project + if: github.event_name == 'issues' || github.event_name == 'pull_request' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + PROJECT_ID=${{ env.PROJECT_ID }} + REPO="${{ github.repository }}" + + if [ "${{ github.event_name }}" = "issues" ]; then + ISSUE_URL="https://github.com/${REPO}/issues/${{ github.event.issue.number }}" + out=$(gh project item-add "$PROJECT_ID" --url "$ISSUE_URL" 2>&1) || { echo "Error adding issue $ISSUE_URL to project: $out" >&2; exit 1; } + elif [ "${{ github.event_name }}" = "pull_request" ]; then + URL="https://github.com/${REPO}/pull/${{ github.event.pull_request.number }}" + out=$(gh project item-add "$PROJECT_ID" --url "$URL" 2>&1) || { echo "Error adding PR $URL to project: $out" >&2; exit 1; } + else + echo "Unsupported event: ${{ github.event_name }}" >&2 + exit 1 + fi diff --git a/.gitignore b/.gitignore index bd38169..d3933e1 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,21 @@ current_logs.txt agent_builder_full_architecture_with_api_gateway agent_builder_complete_infrastructure +.amazonq/agents/default.json +evaluators/diagram_test.py +evaluators/dynamic_mcp.py + +evaluators/dynamic_model_selection.py +evaluators/evaluator_workflow.py +evaluators/llm-judge.py +evaluators/structured_eval.py +evaluators/tool_context.py +evaluators/tool_eval.py +langchain_chain.py +mydiagram.py +diagrams/Web Application Architecture..png +evaluators/db_tool.py +ARCHITECTURE_DIAGRAM.txt +OLLAMA_QUICK_SUMMARY.txt +mcp.json +tmp_write_check.txt diff --git a/README.md b/README.md index 904a997..11de17e 100644 --- a/README.md +++ b/README.md @@ -520,3 +520,6 @@ Proprietary - All rights reserved - Agent deployments use AWS (AgentCore or Fargate) - MCP integration enables agent-to-agent communication - Meta-tooling allows agents to create their own tools + +## Security +[![Codacy Badge](https://app.codacy.com/project/badge/Grade/13bcb475933b44fca1e7f27dcdbb9078)](https://app.codacy.com?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) \ No newline at end of file diff --git a/Security_checkup.py b/Security_checkup.py new file mode 100644 index 0000000..621ecdf --- /dev/null +++ b/Security_checkup.py @@ -0,0 +1,3 @@ +from strands import Agent, tool +from strands_tools import LocalChromiumBrowser +from strands_tools import think, readfile \ No newline at end of file diff --git a/assistant/main.py b/assistant/main.py index 45c7b52..2a2c2f1 100644 --- a/assistant/main.py +++ b/assistant/main.py @@ -35,7 +35,7 @@ # AWS Bedrock configuration AWS_REGION = os.getenv("AWS_REGION", "us-east-1") -CLAUDE_HAIKU_MODEL = "us.anthropic.claude-haiku-4-5-20250514-v1:0" +CLAUDE_HAIKU_MODEL = "anthropic.claude-haiku-4-5-20251001-v1:0" bedrock_config = Config( region_name=AWS_REGION, @@ -99,13 +99,13 @@ async def chat_with_assistant(request: AssistantRequest): try: # Build messages for Claude messages = [{"role": msg.role, "content": msg.content} for msg in request.messages] - + # Add context if provided if request.context: context_str = f"\n\nCurrent Context:\n{json.dumps(request.context, indent=2)}" if messages: messages[-1]["content"] += context_str - + # Prepare request body body = { "anthropic_version": "bedrock-2023-05-31", @@ -114,28 +114,28 @@ async def chat_with_assistant(request: AssistantRequest): "system": SYSTEM_PROMPT, "messages": messages } - + # Invoke Bedrock model (serverless - pay per token) logger.info(f"Invoking {CLAUDE_HAIKU_MODEL}") response = bedrock_runtime.invoke_model( modelId=CLAUDE_HAIKU_MODEL, body=json.dumps(body) ) - + # Parse response response_body = json.loads(response['body'].read()) assistant_message = response_body['content'][0]['text'] - + # Extract suggestions and code snippets if present suggestions = extract_suggestions(assistant_message) code_snippet = extract_code_snippet(assistant_message) - + return AssistantResponse( message=assistant_message, suggestions=suggestions, code_snippet=code_snippet ) - + except Exception as e: logger.error(f"Error invoking assistant: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/convex/_generated/api.d.ts b/convex/_generated/api.d.ts index d9ed32e..31bd99c 100644 --- a/convex/_generated/api.d.ts +++ b/convex/_generated/api.d.ts @@ -8,10 +8,13 @@ * @module */ +import type * as agentAsToolGenerator from "../agentAsToolGenerator.js"; import type * as agentBuilderPrompts from "../agentBuilderPrompts.js"; import type * as agentBuilderWorkflow from "../agentBuilderWorkflow.js"; import type * as agentCapabilities from "../agentCapabilities.js"; import type * as agentCoreTester from "../agentCoreTester.js"; +import type * as agentImprovement from "../agentImprovement.js"; +import type * as agentTemplates from "../agentTemplates.js"; import type * as agentValidation from "../agentValidation.js"; import type * as agentcoreDeployment from "../agentcoreDeployment.js"; import type * as agentcoreMCP from "../agentcoreMCP.js"; @@ -19,9 +22,11 @@ import type * as agentcoreSetup from "../agentcoreSetup.js"; import type * as agentcoreTestExecution from "../agentcoreTestExecution.js"; import type * as agents from "../agents.js"; import type * as apiKeys from "../apiKeys.js"; +import type * as auditLogs from "../auditLogs.js"; import type * as auth from "../auth.js"; import type * as authDebug from "../authDebug.js"; import type * as authErrorHandler from "../authErrorHandler.js"; +import type * as automatedAgentBuilder from "../automatedAgentBuilder.js"; import type * as awsAuth from "../awsAuth.js"; import type * as awsCrossAccount from "../awsCrossAccount.js"; import type * as awsDeployment from "../awsDeployment.js"; @@ -35,6 +40,7 @@ import type * as codeGenerator from "../codeGenerator.js"; import type * as cognitoAuth from "../cognitoAuth.js"; import type * as constants from "../constants.js"; import type * as containerOrchestrator from "../containerOrchestrator.js"; +import type * as conversationAnalysis from "../conversationAnalysis.js"; import type * as conversations from "../conversations.js"; import type * as crons from "../crons.js"; import type * as debuggingAgent from "../debuggingAgent.js"; @@ -52,9 +58,18 @@ import type * as lambdaTesting from "../lambdaTesting.js"; import type * as lib_aws_cloudwatchClient from "../lib/aws/cloudwatchClient.js"; import type * as lib_aws_ecsClient from "../lib/aws/ecsClient.js"; import type * as lib_aws_s3Client from "../lib/aws/s3Client.js"; +import type * as lib_bedrockGate from "../lib/bedrockGate.js"; import type * as lib_cloudFormationGenerator from "../lib/cloudFormationGenerator.js"; +import type * as lib_dynamicModelSwitching from "../lib/dynamicModelSwitching.js"; import type * as lib_fileGenerators from "../lib/fileGenerators.js"; -import type * as lib_stateValidation from "../lib/stateValidation.js"; +import type * as lib_memoryStore from "../lib/memoryStore.js"; +import type * as lib_messageExecutor from "../lib/messageExecutor.js"; +import type * as lib_roles from "../lib/roles.js"; +import type * as lib_strandsTools from "../lib/strandsTools.js"; +import type * as lib_tierConfig from "../lib/tierConfig.js"; +import type * as lib_tokenBilling from "../lib/tokenBilling.js"; +import type * as lib_unifiedModalitySwitching from "../lib/unifiedModalitySwitching.js"; +import type * as localModelDetector from "../localModelDetector.js"; import type * as maintenance from "../maintenance.js"; import type * as mcpClient from "../mcpClient.js"; import type * as mcpConfig from "../mcpConfig.js"; @@ -66,16 +81,36 @@ import type * as metaAgentWorkflow from "../metaAgentWorkflow.js"; import type * as metaTooling from "../metaTooling.js"; import type * as modelBenchmarks from "../modelBenchmarks.js"; import type * as modelRegistry from "../modelRegistry.js"; +import type * as multiAgentRuntime from "../multiAgentRuntime.js"; +import type * as ollamaInstaller from "../ollamaInstaller.js"; +import type * as ollamaMCPIntegration from "../ollamaMCPIntegration.js"; import type * as packageMutations from "../packageMutations.js"; +import type * as platformValue from "../platformValue.js"; +import type * as promptChainExecutor from "../promptChainExecutor.js"; import type * as queueProcessor from "../queueProcessor.js"; +import type * as rateLimiter from "../rateLimiter.js"; import type * as realAgentTesting from "../realAgentTesting.js"; import type * as router from "../router.js"; import type * as strandsAgentExecution from "../strandsAgentExecution.js"; +import type * as strandsAgentExecutionDynamic from "../strandsAgentExecutionDynamic.js"; +import type * as strandsAgentsTools from "../strandsAgentsTools.js"; +import type * as stripe from "../stripe.js"; +import type * as stripeMutations from "../stripeMutations.js"; +import type * as swarmTestingOrchestrator from "../swarmTestingOrchestrator.js"; import type * as templates from "../templates.js"; import type * as testExecution from "../testExecution.js"; import type * as toolRegistry from "../toolRegistry.js"; +import type * as tools from "../tools.js"; import type * as types_tools from "../types/tools.js"; +import type * as unifiedAgentExecution from "../unifiedAgentExecution.js"; +import type * as unifiedAgentExecutionTest from "../unifiedAgentExecutionTest.js"; +import type * as unifiedUserIdentity from "../unifiedUserIdentity.js"; import type * as userAWSAccounts from "../userAWSAccounts.js"; +import type * as users from "../users.js"; +import type * as workflowExecutor from "../workflowExecutor.js"; +import type * as workflowTemplateSeeder from "../workflowTemplateSeeder.js"; +import type * as workflowTemplates from "../workflowTemplates.js"; +import type * as workflows from "../workflows.js"; import type { ApiFromModules, @@ -83,19 +118,14 @@ import type { FunctionReference, } from "convex/server"; -/** - * A utility for referencing Convex functions in your app's API. - * - * Usage: - * ```js - * const myFunctionReference = api.myModule.myFunction; - * ``` - */ declare const fullApi: ApiFromModules<{ + agentAsToolGenerator: typeof agentAsToolGenerator; agentBuilderPrompts: typeof agentBuilderPrompts; agentBuilderWorkflow: typeof agentBuilderWorkflow; agentCapabilities: typeof agentCapabilities; agentCoreTester: typeof agentCoreTester; + agentImprovement: typeof agentImprovement; + agentTemplates: typeof agentTemplates; agentValidation: typeof agentValidation; agentcoreDeployment: typeof agentcoreDeployment; agentcoreMCP: typeof agentcoreMCP; @@ -103,9 +133,11 @@ declare const fullApi: ApiFromModules<{ agentcoreTestExecution: typeof agentcoreTestExecution; agents: typeof agents; apiKeys: typeof apiKeys; + auditLogs: typeof auditLogs; auth: typeof auth; authDebug: typeof authDebug; authErrorHandler: typeof authErrorHandler; + automatedAgentBuilder: typeof automatedAgentBuilder; awsAuth: typeof awsAuth; awsCrossAccount: typeof awsCrossAccount; awsDeployment: typeof awsDeployment; @@ -119,6 +151,7 @@ declare const fullApi: ApiFromModules<{ cognitoAuth: typeof cognitoAuth; constants: typeof constants; containerOrchestrator: typeof containerOrchestrator; + conversationAnalysis: typeof conversationAnalysis; conversations: typeof conversations; crons: typeof crons; debuggingAgent: typeof debuggingAgent; @@ -136,9 +169,18 @@ declare const fullApi: ApiFromModules<{ "lib/aws/cloudwatchClient": typeof lib_aws_cloudwatchClient; "lib/aws/ecsClient": typeof lib_aws_ecsClient; "lib/aws/s3Client": typeof lib_aws_s3Client; + "lib/bedrockGate": typeof lib_bedrockGate; "lib/cloudFormationGenerator": typeof lib_cloudFormationGenerator; + "lib/dynamicModelSwitching": typeof lib_dynamicModelSwitching; "lib/fileGenerators": typeof lib_fileGenerators; - "lib/stateValidation": typeof lib_stateValidation; + "lib/memoryStore": typeof lib_memoryStore; + "lib/messageExecutor": typeof lib_messageExecutor; + "lib/roles": typeof lib_roles; + "lib/strandsTools": typeof lib_strandsTools; + "lib/tierConfig": typeof lib_tierConfig; + "lib/tokenBilling": typeof lib_tokenBilling; + "lib/unifiedModalitySwitching": typeof lib_unifiedModalitySwitching; + localModelDetector: typeof localModelDetector; maintenance: typeof maintenance; mcpClient: typeof mcpClient; mcpConfig: typeof mcpConfig; @@ -150,25 +192,61 @@ declare const fullApi: ApiFromModules<{ metaTooling: typeof metaTooling; modelBenchmarks: typeof modelBenchmarks; modelRegistry: typeof modelRegistry; + multiAgentRuntime: typeof multiAgentRuntime; + ollamaInstaller: typeof ollamaInstaller; + ollamaMCPIntegration: typeof ollamaMCPIntegration; packageMutations: typeof packageMutations; + platformValue: typeof platformValue; + promptChainExecutor: typeof promptChainExecutor; queueProcessor: typeof queueProcessor; + rateLimiter: typeof rateLimiter; realAgentTesting: typeof realAgentTesting; router: typeof router; strandsAgentExecution: typeof strandsAgentExecution; + strandsAgentExecutionDynamic: typeof strandsAgentExecutionDynamic; + strandsAgentsTools: typeof strandsAgentsTools; + stripe: typeof stripe; + stripeMutations: typeof stripeMutations; + swarmTestingOrchestrator: typeof swarmTestingOrchestrator; templates: typeof templates; testExecution: typeof testExecution; toolRegistry: typeof toolRegistry; + tools: typeof tools; "types/tools": typeof types_tools; + unifiedAgentExecution: typeof unifiedAgentExecution; + unifiedAgentExecutionTest: typeof unifiedAgentExecutionTest; + unifiedUserIdentity: typeof unifiedUserIdentity; userAWSAccounts: typeof userAWSAccounts; + users: typeof users; + workflowExecutor: typeof workflowExecutor; + workflowTemplateSeeder: typeof workflowTemplateSeeder; + workflowTemplates: typeof workflowTemplates; + workflows: typeof workflows; }>; -declare const fullApiWithMounts: typeof fullApi; +/** + * A utility for referencing Convex functions in your app's public API. + * + * Usage: + * ```js + * const myFunctionReference = api.myModule.myFunction; + * ``` + */ export declare const api: FilterApi< - typeof fullApiWithMounts, + typeof fullApi, FunctionReference >; + +/** + * A utility for referencing Convex functions in your app's internal API. + * + * Usage: + * ```js + * const myFunctionReference = internal.myModule.myFunction; + * ``` + */ export declare const internal: FilterApi< - typeof fullApiWithMounts, + typeof fullApi, FunctionReference >; diff --git a/convex/_generated/dataModel.d.ts b/convex/_generated/dataModel.d.ts index 8541f31..f97fd19 100644 --- a/convex/_generated/dataModel.d.ts +++ b/convex/_generated/dataModel.d.ts @@ -38,7 +38,7 @@ export type Doc = DocumentByName< * Convex documents are uniquely identified by their `Id`, which is accessible * on the `_id` field. To learn more, see [Document IDs](https://docs.convex.dev/using/document-ids). * - * Documents can be loaded using `db.get(id)` in query and mutation functions. + * Documents can be loaded using `db.get(tableName, id)` in query and mutation functions. * * IDs are just strings at runtime, but this type can be used to distinguish them from other * strings when type checking. diff --git a/convex/_generated/server.d.ts b/convex/_generated/server.d.ts index b5c6828..bec05e6 100644 --- a/convex/_generated/server.d.ts +++ b/convex/_generated/server.d.ts @@ -10,7 +10,6 @@ import { ActionBuilder, - AnyComponents, HttpActionBuilder, MutationBuilder, QueryBuilder, @@ -19,15 +18,9 @@ import { GenericQueryCtx, GenericDatabaseReader, GenericDatabaseWriter, - FunctionReference, } from "convex/server"; import type { DataModel } from "./dataModel.js"; -type GenericCtx = - | GenericActionCtx - | GenericMutationCtx - | GenericQueryCtx; - /** * Define a query in this Convex app's public API. * @@ -92,11 +85,12 @@ export declare const internalAction: ActionBuilder; /** * Define an HTTP action. * - * This function will be used to respond to HTTP requests received by a Convex - * deployment if the requests matches the path and method where this action - * is routed. Be sure to route your action in `convex/http.js`. + * The wrapped function will be used to respond to HTTP requests received + * by a Convex deployment if the requests matches the path and method where + * this action is routed. Be sure to route your httpAction in `convex/http.js`. * - * @param func - The function. It receives an {@link ActionCtx} as its first argument. + * @param func - The function. It receives an {@link ActionCtx} as its first argument + * and a Fetch API `Request` object as its second. * @returns The wrapped function. Import this function from `convex/http.js` and route it to hook it up. */ export declare const httpAction: HttpActionBuilder; diff --git a/convex/_generated/server.js b/convex/_generated/server.js index 4a21df4..bf3d25a 100644 --- a/convex/_generated/server.js +++ b/convex/_generated/server.js @@ -16,7 +16,6 @@ import { internalActionGeneric, internalMutationGeneric, internalQueryGeneric, - componentsGeneric, } from "convex/server"; /** @@ -81,10 +80,14 @@ export const action = actionGeneric; export const internalAction = internalActionGeneric; /** - * Define a Convex HTTP action. + * Define an HTTP action. * - * @param func - The function. It receives an {@link ActionCtx} as its first argument, and a `Request` object - * as its second. - * @returns The wrapped endpoint function. Route a URL path to this function in `convex/http.js`. + * The wrapped function will be used to respond to HTTP requests received + * by a Convex deployment if the requests matches the path and method where + * this action is routed. Be sure to route your httpAction in `convex/http.js`. + * + * @param func - The function. It receives an {@link ActionCtx} as its first argument + * and a Fetch API `Request` object as its second. + * @returns The wrapped function. Import this function from `convex/http.js` and route it to hook it up. */ export const httpAction = httpActionGeneric; diff --git a/convex/agentAsToolGenerator.ts b/convex/agentAsToolGenerator.ts new file mode 100644 index 0000000..4131bd2 --- /dev/null +++ b/convex/agentAsToolGenerator.ts @@ -0,0 +1,299 @@ +/** + * Agent-as-Tool Generator + * + * Generates @tool decorated functions that wrap other agents, + * enabling hierarchical agent architectures and coordination. + */ + +import { action } from "./_generated/server"; +import { v } from "convex/values"; +import { api } from "./_generated/api"; +import type { Id } from "./_generated/dataModel"; + +/** + * Generate agent-as-tool wrapper code + */ +export const generateAgentAsTool = action( { + args: { + agentId: v.id( "agents" ), + }, + handler: async ( ctx, args ): Promise<{ toolName: string; toolCode: string; importStatement: string }> => { + const agent: any = await ctx.runQuery( api.agents.get, { id: args.agentId } ); + if ( !agent ) throw new Error( "Agent not found" ); + + const toolName: string = agent.name.replaceAll( /[^a-zA-Z0-9_]/g, '_' ).toLowerCase(); + const toolCode = generateToolCode( agent.name, toolName, agent.description || "", args.agentId ); + + return { + toolName, + toolCode, + importStatement: `from tools.${toolName} import ${toolName}`, + }; + }, +} ); + +/** + * Generate tool wrapper code for an agent + */ +function generateToolCode( + agentName: string, + toolName: string, + description: string, + agentId: string +): string { + // Sanitize inputs to prevent template injection in generated Python code + const sanitize = ( s: string ) => s.replaceAll( '\\', "\\\\" ).replaceAll( '"""', String.raw`\"\"\"` ); + const safeAgentName = sanitize( agentName ); + return `""" +Agent-as-Tool: ${safeAgentName} +Auto-generated wrapper to use ${safeAgentName} as a tool in other agents. +""" + +from strandsagents import tool +import os +import requests +from typing import Optional + +@tool( + name="${toolName}", + description="${description || `Invoke ${agentName} agent to handle specialized tasks`}", + parameters={ + "task": { + "type": "string", + "description": "The task or question to send to ${agentName}", + "required": True + }, + "context": { + "type": "object", + "description": "Optional context to pass to the agent", + "required": False + } + } +) +async def ${toolName}(task: str, context: Optional[dict] = None) -> str: + """ + Invoke ${agentName} agent as a tool. + + This allows hierarchical agent architectures where one agent + can delegate tasks to specialized agents. + + Args: + task: The task or question for ${agentName} + context: Optional context dictionary + + Returns: + str: Response from ${agentName} + """ + try: + # Get platform API endpoint from environment + api_url = os.getenv("PLATFORM_API_URL", "https://api.mikepfunk.com") + + # Call platform API to execute agent + response = requests.post( + f"{api_url}/execute-agent", + json={ + "agentId": "${agentId}", + "message": task, + "context": context or {} + }, + headers={ + "Authorization": f"Bearer {os.getenv('PLATFORM_API_KEY')}", + "Content-Type": "application/json" + }, + timeout=300 # 5 minute timeout + ) + + response.raise_for_status() + result = response.json() + + if result.get("success"): + return result.get("content", "") + else: + return f"Error from ${agentName}: {result.get('error', 'Unknown error')}" + + except Exception as e: + return f"Failed to invoke ${agentName}: {str(e)}" +`; +} + +/** + * Generate coordinator agent that uses other agents as tools + */ +export const generateCoordinatorAgent = action( { + args: { + coordinatorName: v.string(), + coordinatorPrompt: v.string(), + agentIds: v.array( v.id( "agents" ) ), + coordinationStrategy: v.union( + v.literal( "sequential" ), + v.literal( "parallel" ), + v.literal( "dynamic" ) + ), + }, + handler: async ( ctx, args ): Promise<{ coordinatorCode: string; agentTools: Array<{ name: string; agentId: string; agentName: string; description: string }> }> => { + // Get all agents + const agents: any[] = await Promise.all( + args.agentIds.map( ( id: Id<"agents"> ) => ctx.runQuery( api.agents.get, { id } ) ) + ); + + // Generate tool wrappers for each agent + const agentTools: Array<{ name: string; agentId: string; agentName: string; description: string }> = agents.map( ( agent: any ) => { + if ( !agent ) return null; + const toolName: string = agent.name.replaceAll( /[\W]/g, '_' ).toLowerCase(); + return { + name: toolName, + agentId: agent._id, + agentName: agent.name, + description: agent.description || `Invoke ${agent.name} agent`, + }; + } ).filter( Boolean ) as Array<{ name: string; agentId: string; agentName: string; description: string }>; + + // Generate coordinator agent code + const coordinatorCode = generateCoordinatorCode( + args.coordinatorName, + args.coordinatorPrompt, + agentTools, + args.coordinationStrategy + ); + + return { + coordinatorCode, + agentTools, + }; + }, +} ); + +/** + * Generate coordinator agent code + */ +function generateCoordinatorCode( + name: string, + systemPrompt: string, + agentTools: Array<{ name: string; agentId: string; agentName: string; description: string }>, + strategy: string +): string { + const toolImports = agentTools.map( t => `from tools.${t.name} import ${t.name}` ).join( '\n' ); + const toolList = agentTools.map( t => t.name ).join( ', ' ); + + return String.raw`""" +Coordinator Agent: ${name} +Coordinates multiple specialized agents to solve complex tasks. +""" + +from strandsagents import agent, Agent +from bedrock_agentcore.runtime import BedrockAgentCoreApp +import asyncio +import logging + +# Import agent tools +${toolImports} + +logger = logging.getLogger(__name__) + +@agent( + model="anthropic.claude-sonnet-4.5-v2", + system_prompt="""${systemPrompt} + +You are a coordinator agent with access to specialized agents as tools: +${agentTools.map( t => `- ${t.name}: ${t.description}` ).join( '\n' )} + +Coordination Strategy: ${strategy} +${strategy === 'sequential' ? '- Execute agents one after another, passing output forward' : ''} +${strategy === 'parallel' ? '- Execute multiple agents simultaneously when tasks are independent' : ''} +${strategy === 'dynamic' ? '- Decide dynamically which agents to use and in what order' : ''} + +When delegating to agents: +1. Clearly define the task for each agent +2. Provide necessary context +3. Synthesize results from multiple agents +4. Return a coherent final response +""", + tools=[${toolList}], + memory=True, + reasoning="interleaved" +) +class ${name.replaceAll( /[^a-zA-Z0-9]/g, '' )}Coordinator(Agent): + """ + Coordinator agent that orchestrates multiple specialized agents. + """ + + async def coordinate_sequential(self, task: str, agents: list) -> str: + """Execute agents sequentially, passing output forward.""" + result = task + for agent_tool in agents: + logger.info(f"Executing {agent_tool.__name__} with input: {result[:100]}...") + result = await agent_tool(task=result) + return result + + async def coordinate_parallel(self, task: str, agents: list) -> str: + """Execute agents in parallel and aggregate results.""" + logger.info(f"Executing {len(agents)} agents in parallel...") + results = await asyncio.gather(*[ + agent_tool(task=task) for agent_tool in agents + ]) + + # Synthesize results + synthesis = "Results from parallel execution:\n\n" + for i, result in enumerate(results): + synthesis += f"Agent {i+1}:\n{result}\n\n" + + return synthesis + +# AgentCore Runtime Setup +app = BedrockAgentCoreApp() +coordinator = None + +@app.entrypoint +async def agent_entrypoint(payload): + global coordinator + + if coordinator is None: + coordinator = ${name.replaceAll( /[^a-zA-Z0-9]/g, '' )}Coordinator() + + user_input = payload.get("prompt", "") + return await coordinator.run(user_input) + +if __name__ == "__main__": + app.run() +`; +} + +/** + * Link agents together for coordination + */ +export const linkAgentsForCoordination = action( { + args: { + parentAgentId: v.id( "agents" ), + childAgentIds: v.array( v.id( "agents" ) ), + }, + handler: async ( ctx, args ): Promise<{ success: boolean; parentAgentId: Id<"agents">; childTools: Array<{ name: string; type: string; config: { agentId: Id<"agents">; agentName: string; description: string } }> }> => { + // Update parent agent to include child agents as tools + const parent: any = await ctx.runQuery( api.agents.get, { id: args.parentAgentId } ); + if ( !parent ) throw new Error( "Parent agent not found" ); + + // Generate tool wrappers for child agents + const childTools: Array<{ name: string; type: string; config: { agentId: Id<"agents">; agentName: string; description: string } }> = ( await Promise.all( + args.childAgentIds.map( async ( childId: Id<"agents"> ) => { + const child: any = await ctx.runQuery( api.agents.get, { id: childId } ); + if ( !child ) return null; + + const toolName: string = child.name.replaceAll( /[\W]/g, '_' ).toLowerCase(); + return { + name: toolName, + type: "agent_tool", + config: { + agentId: childId, + agentName: child.name, + description: child.description || `Invoke ${child.name} agent`, + }, + }; + } ) + ) ).filter( Boolean ) as Array<{ name: string; type: string; config: { agentId: Id<"agents">; agentName: string; description: string } }>; + + return { + success: true, + parentAgentId: args.parentAgentId, + childTools, + }; + }, +} ); diff --git a/convex/agentBuilderWorkflow.ts b/convex/agentBuilderWorkflow.ts index 40c9eb9..e65b29a 100644 --- a/convex/agentBuilderWorkflow.ts +++ b/convex/agentBuilderWorkflow.ts @@ -2,28 +2,28 @@ /** * Agent Builder Workflow System - * + * * Multi-stage prompt chaining workflow that guides Claude through * intelligent agent design and implementation. */ import { v } from "convex/values"; import { action } from "./_generated/server"; -import { api } from "./_generated/api"; +import { api, internal } from "./_generated/api"; import { BedrockRuntimeClient, InvokeModelCommand } from "@aws-sdk/client-bedrock-runtime"; const WORKFLOW_MODEL_ID = process.env.BEDROCK_WORKFLOW_MODEL_ID || process.env.DEFAULT_BEDROCK_MODEL_ID || - "us.anthropic.claude-3-5-haiku-20241022-v1:0"; + "anthropic.claude-haiku-4-5-20251001-v1:0"; const WORKFLOW_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || "us-east-1"; -const bedrockClient = new BedrockRuntimeClient({ +const bedrockClient = new BedrockRuntimeClient( { region: WORKFLOW_REGION, -}); +} ); type WorkflowModelPayload = { stageName: string; @@ -177,38 +177,58 @@ Output: /** * Execute a single workflow stage */ -export const executeWorkflowStage = action({ +export const executeWorkflowStage = action( { args: { stage: v.string(), userInput: v.string(), - previousContext: v.optional(v.array(v.object({ + previousContext: v.optional( v.array( v.object( { stage: v.string(), output: v.string() - }))), - conversationId: v.optional(v.string()) + } ) ) ), + conversationId: v.optional( v.string() ) }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { + // Gate: enforce tier-based Bedrock access + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const gateResult = await requireBedrockAccess( + ctx, WORKFLOW_MODEL_ID, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + throw new Error( gateResult.reason ); + } + const stage = WORKFLOW_STAGES[args.stage as keyof typeof WORKFLOW_STAGES]; - if (!stage) { - throw new Error(`Invalid workflow stage: ${args.stage}`); + if ( !stage ) { + throw new Error( `Invalid workflow stage: ${args.stage}` ); } // Build context from previous stages let contextPrompt = ""; - if (args.previousContext && args.previousContext.length > 0) { + if ( args.previousContext && args.previousContext.length > 0 ) { contextPrompt = "\n\nPREVIOUS WORKFLOW OUTPUTS:\n\n"; - for (const ctx of args.previousContext) { - contextPrompt += `=== ${ctx.stage.toUpperCase()} ===\n${ctx.output}\n\n`; + for ( const prevCtx of args.previousContext ) { + contextPrompt += `=== ${prevCtx.stage.toUpperCase()} ===\n${prevCtx.output}\n\n`; } } const fullPrompt = `${contextPrompt}USER REQUEST:\n${args.userInput}\n\nYour task: ${stage.systemPrompt}`; - const result = await invokeWorkflowModel({ + const result = await invokeWorkflowModel( { stageName: stage.name, systemPrompt: stage.systemPrompt, userPrompt: fullPrompt, - }); + } ); + + // Meter: token-based billing for this workflow stage + if ( result.inputTokens > 0 || result.outputTokens > 0 ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: WORKFLOW_MODEL_ID, + inputTokens: result.inputTokens, + outputTokens: result.outputTokens, + } ); + } return { stage: stage.name, @@ -219,9 +239,9 @@ export const executeWorkflowStage = action({ } }; } -}); +} ); -async function invokeWorkflowModel(payload: WorkflowModelPayload): Promise { +async function invokeWorkflowModel( payload: WorkflowModelPayload ): Promise { const { stageName, systemPrompt, userPrompt } = payload; const requestBody = { @@ -242,26 +262,26 @@ async function invokeWorkflowModel(payload: WorkflowModelPayload): Promise block?.type === "text" && typeof block.text === "string") - .map((block: any) => block.text as string) - .join("\n\n") + .filter( ( block: any ) => block?.type === "text" && typeof block.text === "string" ) + .map( ( block: any ) => block.text as string ) + .join( "\n\n" ) .trim(); - if (!outputText) { - throw new Error("Bedrock response did not include text content"); + if ( !outputText ) { + throw new Error( "Bedrock response did not include text content" ); } const usage = json.usage ?? {}; @@ -273,13 +293,13 @@ async function invokeWorkflowModel(payload: WorkflowModelPayload): Promise { + handler: async ( ctx, args ) => { + // Gate: enforce tier-based Bedrock access + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const gateResult = await requireBedrockAccess( + ctx, WORKFLOW_MODEL_ID, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + throw new Error( gateResult.reason ); + } + const workflowResults: Array<{ stage: string; output: string; @@ -303,7 +333,7 @@ export const executeCompleteWorkflow = action({ const stages = [ "REQUIREMENTS", - "ARCHITECTURE", + "ARCHITECTURE", "TOOL_DESIGN", "IMPLEMENTATION", "CODE_GENERATION", @@ -311,26 +341,26 @@ export const executeCompleteWorkflow = action({ ]; // Execute each stage sequentially, passing context forward - for (const stageName of stages) { - const result = await ctx.runAction(api.agentBuilderWorkflow.executeWorkflowStage, { + for ( const stageName of stages ) { + const result = await ctx.runAction( api.agentBuilderWorkflow.executeWorkflowStage, { stage: stageName, userInput: args.userRequest, - previousContext: workflowResults.map(r => ({ + previousContext: workflowResults.map( r => ( { stage: r.stage, output: r.output - })), + } ) ), conversationId: args.conversationId - }); + } ); - workflowResults.push(result); + workflowResults.push( result ); } // Calculate total usage const totalUsage = workflowResults.reduce( - (acc, r) => ({ + ( acc, r ) => ( { inputTokens: acc.inputTokens + r.usage.inputTokens, outputTokens: acc.outputTokens + r.usage.outputTokens - }), + } ), { inputTokens: 0, outputTokens: 0 } ); @@ -341,26 +371,36 @@ export const executeCompleteWorkflow = action({ finalOutput: workflowResults[workflowResults.length - 1].output }; } -}); +} ); /** * Stream workflow execution with real-time updates */ -export const streamWorkflowExecution = action({ +export const streamWorkflowExecution = action( { args: { userRequest: v.string(), - conversationId: v.optional(v.string()) + conversationId: v.optional( v.string() ) }, - handler: async (ctx, args) => { - const Anthropic = (await import("@anthropic-ai/sdk")).default; - const anthropic = new Anthropic({ + handler: async ( ctx, args ) => { + // Gate: enforce tier-based Bedrock access + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const gateResult = await requireBedrockAccess( + ctx, WORKFLOW_MODEL_ID, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + throw new Error( gateResult.reason ); + } + + const Anthropic = ( await import( "@anthropic-ai/sdk" ) ).default; + const anthropic = new Anthropic( { apiKey: process.env.ANTHROPIC_API_KEY, - }); + } ); const stages = [ "REQUIREMENTS", "ARCHITECTURE", - "TOOL_DESIGN", + "TOOL_DESIGN", "IMPLEMENTATION", "CODE_GENERATION", "VALIDATION" @@ -368,22 +408,22 @@ export const streamWorkflowExecution = action({ const workflowContext: Array<{ stage: string; output: string }> = []; - for (const stageName of stages) { + for ( const stageName of stages ) { const stage = WORKFLOW_STAGES[stageName as keyof typeof WORKFLOW_STAGES]; - + // Build context let contextPrompt = ""; - if (workflowContext.length > 0) { + if ( workflowContext.length > 0 ) { contextPrompt = "\n\nPREVIOUS WORKFLOW OUTPUTS:\n\n"; - for (const ctx of workflowContext) { - contextPrompt += `=== ${ctx.stage.toUpperCase()} ===\n${ctx.output}\n\n`; + for ( const prevCtx of workflowContext ) { + contextPrompt += `=== ${prevCtx.stage.toUpperCase()} ===\n${prevCtx.output}\n\n`; } } const fullPrompt = `${contextPrompt}USER REQUEST:\n${args.userRequest}\n\nYour task: ${stage.systemPrompt}`; // Stream this stage - const stream = await anthropic.messages.create({ + const stream = await anthropic.messages.create( { model: "claude-3-7-sonnet-20250219", max_tokens: 8000, temperature: 0.7, @@ -393,21 +433,21 @@ export const streamWorkflowExecution = action({ content: fullPrompt }], stream: true - }); + } ); let stageOutput = ""; - - for await (const event of stream) { - if (event.type === "content_block_delta" && - event.delta.type === "text_delta") { + + for await ( const event of stream ) { + if ( event.type === "content_block_delta" && + event.delta.type === "text_delta" ) { stageOutput += event.delta.text; } } - workflowContext.push({ + workflowContext.push( { stage: stage.name, output: stageOutput - }); + } ); } return { @@ -415,4 +455,4 @@ export const streamWorkflowExecution = action({ workflow: workflowContext }; } -}); +} ); diff --git a/convex/agentExecution.test.ts b/convex/agentExecution.test.ts index e885259..463efc8 100644 --- a/convex/agentExecution.test.ts +++ b/convex/agentExecution.test.ts @@ -28,11 +28,10 @@ describe("Agent Execution Infrastructure", () => { // Create test user testUserId = await t.run(async (ctx: any) => { return await ctx.db.insert("users", { - userId: "test-user-execution", email: "test@execution.com", name: "Test User", tier: "personal", - testsThisMonth: 0, + executionsThisMonth: 0, createdAt: Date.now(), isAnonymous: false, }); @@ -825,16 +824,16 @@ class BedrockTestAgent(Agent): return await ctx.db.get(testUserId); }); - const initialCount = user.testsThisMonth || 0; + const initialCount = user.executionsThisMonth || 0; await t.mutation(api.testExecution.submitTest, { agentId: ollamaAgentId, testQuery: "test", }); - // In production, this would increment testsThisMonth + // In production, this would increment executionsThisMonth // For now, we just verify the field exists - expect(user.testsThisMonth).toBeDefined(); + expect(user.executionsThisMonth).toBeDefined(); }); }); diff --git a/convex/agentImprovement.ts b/convex/agentImprovement.ts new file mode 100644 index 0000000..5a7be84 --- /dev/null +++ b/convex/agentImprovement.ts @@ -0,0 +1,275 @@ +/** + * Agent Improvement & Auto-Update System + * + * Handles automatic agent improvements based on conversation analysis. + * UPDATES existing agents (does not create new ones). + */ + +import { v } from "convex/values"; +import { mutation, action, query, internalMutation } from "./_generated/server"; +import { api, internal } from "./_generated/api"; +import { Id } from "./_generated/dataModel"; + +/** + * Apply improvement plan to an existing agent + * CRITICAL: This UPDATES the agent, does not create a new one + */ +export const applyImprovementPlan = action({ + args: { + agentId: v.id("agents"), + conversationId: v.id("conversations"), + improvementPlan: v.any(), + }, + handler: async (ctx: any, args: { agentId: Id<"agents">; conversationId: Id<"conversations">; improvementPlan: any }): Promise<{ success: boolean; agentId: Id<"agents">; changes: string[]; message: string }> => { + // Get current agent + const agent: any = await ctx.runQuery(api.agents.get, { id: args.agentId }); + if (!agent) { + throw new Error(`Agent ${args.agentId} not found`); + } + + // Get conversation to verify it belongs to this agent + const conversation: any = await ctx.runQuery(api.conversations.get, { conversationId: args.conversationId }); + if (!conversation) { + throw new Error("Conversation not found"); + } + + if (conversation.agentId !== args.agentId) { + throw new Error( + `CRITICAL ERROR: Conversation belongs to agent ${conversation.agentId}, but trying to improve agent ${args.agentId}` + ); + } + + console.log(`✅ Verified: Improving agent ${args.agentId} using conversation ${args.conversationId}`); + + // Generate updated agent configuration + const updatedConfig = generateUpdatedAgentConfig(agent, args.improvementPlan); + + // Update the agent (NOT create new) + await ctx.runMutation(api.agents.update, { + id: args.agentId, + name: agent.name, // Keep same name + description: updatedConfig.description, + systemPrompt: updatedConfig.systemPrompt, + tools: updatedConfig.tools ?? agent.tools, // Use updated tools if provided, else keep existing + model: updatedConfig.model, + // Note: lastImprovedAt and improvementSource would need to be added to schema + }); + + // Log improvement history + await ctx.runMutation(internal.agentImprovement.logImprovement, { + agentId: args.agentId, + conversationId: args.conversationId, + improvementPlan: args.improvementPlan, + changes: updatedConfig.changes, + }); + + return { + success: true, + agentId: args.agentId, // Same agent ID (updated, not created) + changes: updatedConfig.changes, + message: `Agent "${agent.name}" has been improved based on conversation analysis`, + }; + }, +}); + +/** + * Generate updated agent configuration based on improvement plan + */ +function generateUpdatedAgentConfig(agent: any, improvementPlan: any) { + const changes: string[] = []; + let systemPrompt = agent.systemPrompt || ""; + let description = agent.description || ""; + let tools = agent.tools || []; + let model = agent.model; + + // Apply improvements based on type + for (const improvement of improvementPlan.recommendedChanges || []) { + switch (improvement.type) { + case "add_tool": + // Extract tool name from description + const toolMatch = improvement.description.match(/Add capability: (.+)/i); + if (toolMatch) { + const toolName = toolMatch[1].trim(); + if (!tools.includes(toolName)) { + tools.push(toolName); + changes.push(`Added tool: ${toolName}`); + } + } + break; + + case "modify_prompt": + // Improve system prompt clarity + if (improvement.description.includes("clearer instructions")) { + systemPrompt += "\n\n# Additional Instructions\n"; + systemPrompt += "- Be clear and specific in responses\n"; + systemPrompt += "- Confirm understanding before proceeding\n"; + systemPrompt += "- Ask for clarification if user request is ambiguous\n"; + changes.push("Enhanced system prompt with clearer instructions"); + } + break; + + case "improve_error_handling": + // Add error handling instructions + systemPrompt += "\n\n# Error Handling\n"; + systemPrompt += "- Provide user-friendly error messages\n"; + systemPrompt += "- Suggest alternatives when operations fail\n"; + systemPrompt += "- Never expose technical error details to users\n"; + changes.push("Improved error handling in system prompt"); + break; + + case "change_model": + // Extract recommended model + const modelMatch = improvement.implementation.match(/(claude-[^\s]+)/i); + if (modelMatch) { + const newModel = modelMatch[1]; + if (newModel !== model) { + model = newModel; + changes.push(`Changed model from ${agent.model} to ${newModel}`); + } + } + break; + + case "add_memory": + // Add memory tool + if (!tools.includes("memory")) { + tools.push("memory"); + changes.push("Added memory capability"); + } + break; + } + } + + // Update description with improvements + if (changes.length > 0) { + description += `\n\n[Auto-improved based on conversation analysis]`; + } + + return { + systemPrompt, + description, + tools, + model, + changes, + }; +} + +/** + * Log improvement history (internal mutation) + */ +export const logImprovement = internalMutation({ + args: { + agentId: v.id("agents"), + conversationId: v.id("conversations"), + improvementPlan: v.any(), + changes: v.array(v.string()), + }, + handler: async (ctx, args) => { + await ctx.db.insert("agentImprovementHistory", { + agentId: args.agentId, + conversationId: args.conversationId, + improvementPlan: args.improvementPlan, + changes: args.changes, + appliedAt: Date.now(), + }); + }, +}); + +/** + * Get improvement history for an agent + */ +export const getImprovementHistory = query({ + args: { agentId: v.id("agents") }, + handler: async (ctx: any, args: { agentId: Id<"agents"> }): Promise => { + return await ctx.db + .query("agentImprovementHistory") + .withIndex("by_agent", (q: any) => q.eq("agentId", args.agentId)) + .order("desc") + .collect(); + }, +}); + +/** + * Automatic improvement workflow + * Analyzes conversation and applies improvements in one step + */ +export const autoImproveAgent = action({ + args: { + agentId: v.id("agents"), + conversationId: v.id("conversations"), + }, + handler: async (ctx: any, args: { agentId: Id<"agents">; conversationId: Id<"conversations"> }): Promise => { + // Verify conversation belongs to agent + const conversation: any = await ctx.runQuery(api.conversations.get, { conversationId: args.conversationId }); + if (!conversation) { + throw new Error("Conversation not found"); + } + + if (conversation.agentId !== args.agentId) { + throw new Error( + `Cannot improve agent ${args.agentId} using conversation from agent ${conversation.agentId}` + ); + } + + console.log(`🤖 Auto-improving agent ${args.agentId} from conversation ${args.conversationId}`); + + // Generate improvement plan + const improvementPlan: any = await ctx.runAction( + api.conversationAnalysis.generateImprovementPlan, + { conversationId: args.conversationId } + ); + + // Apply improvements + const result: any = await ctx.runAction(api.agentImprovement.applyImprovementPlan, { + agentId: args.agentId, + conversationId: args.conversationId, + improvementPlan, + }); + + return { + ...result, + improvementPlan, + }; + }, +}); + +/** + * Preview improvements without applying them + */ +export const previewImprovements = action({ + args: { + agentId: v.id("agents"), + conversationId: v.id("conversations"), + }, + handler: async (ctx: any, args: { agentId: Id<"agents">; conversationId: Id<"conversations"> }): Promise => { + // Verify conversation belongs to agent + const conversation: any = await ctx.runQuery(api.conversations.get, { conversationId: args.conversationId }); + if (!conversation || conversation.agentId !== args.agentId) { + throw new Error("Invalid conversation or agent mismatch"); + } + + // Get agent + const agent: any = await ctx.runQuery(api.agents.get, { id: args.agentId }); + if (!agent) { + throw new Error("Agent not found"); + } + + // Generate improvement plan + const improvementPlan: any = await ctx.runAction( + api.conversationAnalysis.generateImprovementPlan, + { conversationId: args.conversationId } + ); + + // Generate preview (don't apply) + const updatedConfig = generateUpdatedAgentConfig(agent, improvementPlan); + + return { + currentAgent: { + systemPrompt: agent.systemPrompt, + tools: agent.tools, + model: agent.model, + }, + proposedChanges: updatedConfig, + improvementPlan, + }; + }, +}); diff --git a/convex/agentTemplates.ts b/convex/agentTemplates.ts new file mode 100644 index 0000000..12e267c --- /dev/null +++ b/convex/agentTemplates.ts @@ -0,0 +1,110 @@ +/** + * Pre-built Agent Templates + */ + +import { query } from "./_generated/server"; +import { v } from "convex/values"; + +export const AGENT_TEMPLATES = { + negotiation: { + name: "Negotiation Agent", + description: "Multi-round negotiation with memory and strategy", + model: "anthropic.claude-sonnet-4-5-20250929-v1:0", + systemPrompt: `You are an expert negotiator. Your goal is to reach mutually beneficial agreements. + +NEGOTIATION FRAMEWORK: +1. Understand both parties' interests and constraints +2. Identify areas of common ground +3. Propose creative solutions that maximize value for both sides +4. Use principled negotiation tactics (separate people from problem, focus on interests not positions) +5. Track concessions and maintain fairness + +MEMORY: Remember all previous offers, counteroffers, and stated interests. + +STRATEGY: +- Start with anchoring (first offer sets expectations) +- Use reciprocity (match concessions) +- Create value before claiming it +- Know your BATNA (Best Alternative To Negotiated Agreement) + +Always be respectful, transparent about constraints, and aim for win-win outcomes.`, + tools: ["memory", "calculator", "current_time"], + capabilities: ["multi-turn", "memory", "reasoning"], + useCase: "Contract negotiations, salary discussions, vendor agreements" + }, + + research: { + name: "Research Agent", + description: "Web research with source tracking", + model: "anthropic.claude-sonnet-4-5-20250929-v1:0", + systemPrompt: `You are a research assistant. Gather information from multiple sources and synthesize findings. + +RESEARCH PROCESS: +1. Break down research question into sub-questions +2. Search for relevant information +3. Evaluate source credibility +4. Synthesize findings +5. Cite all sources + +Always provide evidence-based answers with proper citations.`, + tools: ["http_request", "memory", "file_write"], + capabilities: ["web-search", "analysis", "documentation"], + useCase: "Market research, competitive analysis, literature reviews" + }, + + customer_service: { + name: "Customer Service Agent", + description: "Handle customer inquiries with empathy", + model: "anthropic.claude-haiku-4-5-20251001-v1:0", + systemPrompt: `You are a customer service representative. Help customers efficiently and empathetically. + +CUSTOMER SERVICE PRINCIPLES: +1. Listen actively and acknowledge concerns +2. Provide clear, actionable solutions +3. Escalate when necessary +4. Follow up on unresolved issues +5. Maintain professional tone + +Remember customer history and preferences.`, + tools: ["memory", "http_request", "slack"], + capabilities: ["conversation", "memory", "integration"], + useCase: "Support tickets, FAQ responses, issue resolution" + }, + + code_reviewer: { + name: "Code Review Agent", + description: "Review code for quality and security", + model: "anthropic.claude-sonnet-4-5-20250929-v1:0", + systemPrompt: `You are a senior code reviewer. Analyze code for quality, security, and best practices. + +REVIEW CHECKLIST: +1. Security vulnerabilities +2. Performance issues +3. Code maintainability +4. Test coverage +5. Documentation quality +6. Design patterns + +Provide constructive feedback with specific examples.`, + tools: ["file_read", "editor", "python_repl"], + capabilities: ["code-analysis", "security", "best-practices"], + useCase: "Pull request reviews, security audits, refactoring" + } +}; + +export const getAgentTemplates = query( { + args: {}, + handler: async () => { + return Object.entries( AGENT_TEMPLATES ).map( ( [id, template] ) => ( { + id, + ...template + } ) ); + } +} ); + +export const getAgentTemplate = query( { + args: { templateId: v.string() }, + handler: async ( ctx, args ) => { + return AGENT_TEMPLATES[args.templateId as keyof typeof AGENT_TEMPLATES] || null; + } +} ); diff --git a/convex/agentcoreSetup.ts b/convex/agentcoreSetup.ts index 1864212..5f95376 100644 --- a/convex/agentcoreSetup.ts +++ b/convex/agentcoreSetup.ts @@ -11,30 +11,30 @@ import { getAuthUserId } from "@convex-dev/auth/server"; /** * Setup AgentCore MCP server configuration */ -export const setupAgentCoreMCP = action({ +export const setupAgentCoreMCP = action( { args: {}, - handler: async (ctx): Promise<{ success: boolean; message: string; serverId?: any; error?: string }> => { - const userId = await getAuthUserId(ctx); - if (!userId) { - throw new Error("Not authenticated"); + handler: async ( ctx ): Promise<{ success: boolean; message: string; serverId?: any; error?: string }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Not authenticated" ); } try { // Check if AgentCore MCP server already exists - const existing: any = await ctx.runQuery(api.mcpConfig.getMCPServerByName, { + const existing: any = await ctx.runQuery( api.mcpConfig.getMCPServerByName, { serverName: "bedrock-agentcore-mcp-server", - }); + } ); - if (existing) { - return { - success: true, + if ( existing ) { + return { + success: true, message: "AgentCore MCP server already configured", - serverId: existing._id + serverId: existing._id }; } // Create AgentCore MCP server based on your Windows config - const serverId: any = await ctx.runMutation(api.mcpConfig.addMCPServer, { + const serverId: any = await ctx.runMutation( api.mcpConfig.addMCPServer, { name: "bedrock-agentcore-mcp-server", command: "uv", args: [ @@ -50,93 +50,93 @@ export const setupAgentCoreMCP = action({ }, disabled: false, timeout: 60000, // 60 seconds - }); + } ); - return { - success: true, + return { + success: true, message: "AgentCore MCP server configured successfully", - serverId + serverId }; - } catch (error: any) { - return { - success: false, + } catch ( error: any ) { + return { + success: false, message: "Setup failed", - error: error.message || "Failed to setup AgentCore MCP server" + error: error.message || "Failed to setup AgentCore MCP server" }; } }, -}); +} ); /** * Test AgentCore connection */ -export const testAgentCoreConnection = action({ +export const testAgentCoreConnection = action( { args: {}, - handler: async (ctx): Promise<{ success: boolean; status?: string; error?: string; tools?: any[] }> => { - const userId = await getAuthUserId(ctx); - if (!userId) { - throw new Error("Not authenticated"); + handler: async ( ctx ): Promise<{ success: boolean; status?: string; error?: string; tools?: any[] }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Not authenticated" ); } try { - const result: any = await ctx.runAction(api.mcpClient.testMCPServerConnection, { + const result: any = await ctx.runAction( api.mcpClient.testMCPServerConnection, { serverName: "bedrock-agentcore-mcp-server", - }); + } ); return result; - } catch (error: any) { + } catch ( error: any ) { return { success: false, error: error.message || "Failed to test AgentCore connection", }; } }, -}); +} ); /** * Execute agent test */ -export const executeAgentTest = action({ +export const executeAgentTest = action( { args: { - agentId: v.id("agents"), + agentId: v.id( "agents" ), input: v.string(), chatType: v.string(), // "agent_builder" | "test_chat" | "chat_ui" }, - handler: async (ctx, args): Promise<{ success: boolean; response?: string; error?: string; testId?: any; executionTime?: number }> => { - const userId = await getAuthUserId(ctx); - if (!userId) { - throw new Error("Not authenticated"); + handler: async ( ctx, args ): Promise<{ success: boolean; response?: string; error?: string; testId?: any; executionTime?: number }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Not authenticated" ); } try { // Get agent - const agent: any = await ctx.runQuery(api.agents.get, { id: args.agentId }); - if (!agent) { - throw new Error("Agent not found"); + const agent: any = await ctx.runQuery( api.agents.get, { id: args.agentId } ); + if ( !agent ) { + throw new Error( "Agent not found" ); } // Check ownership - if (agent.createdBy !== userId) { - throw new Error("Not authorized to test this agent"); + if ( agent.createdBy !== userId ) { + throw new Error( "Not authorized to test this agent" ); } // Simple rate limiting check const user: any = { tier: "freemium" }; // Simplified for now const tier: string = user?.tier || "freemium"; - + // Get monthly test count - simplified for now const monthlyTests: any = 0; // Simplified for now - // Rate limits by tier - const limits = { - freemium: 50, - personal: 500, - enterprise: -1, // unlimited - }; - const limit = limits[tier as keyof typeof limits] || 50; - - if (limit !== -1 && monthlyTests >= limit) { + // Rate limits from centralized tier config + const { getTierConfig } = await import( "./lib/tierConfig" ); + const tierConfig = getTierConfig( tier ); + if ( !tierConfig ) { + throw new Error( "User tier configuration not found" ); + } + const limit = tierConfig.monthlyExecutions; // -1 = unlimited + + if ( limit !== -1 && monthlyTests >= limit ) { return { success: false, error: `Monthly test limit reached (${monthlyTests}/${limit}). Upgrade for more tests.`, @@ -144,14 +144,14 @@ export const executeAgentTest = action({ } // Create test execution - const testId: any = await ctx.runMutation(api.testExecution.submitTest, { + const testId: any = await ctx.runMutation( api.testExecution.submitTest, { agentId: args.agentId, testQuery: args.input, timeout: 60000, - }); + } ); - // Execute via AgentCore MCP (using existing mcpClient) - const result: any = await ctx.runAction(api.mcpClient.invokeMCPTool, { + // Execute via AgentCore MCP + const result: any = await ctx.runAction( api.mcpClient.invokeMCPTool, { serverName: "bedrock-agentcore-mcp-server", toolName: "execute_agent", parameters: { @@ -161,10 +161,10 @@ export const executeAgentTest = action({ system_prompt: agent.systemPrompt, }, timeout: 60000, - }); + } ); // Update test execution using internal mutation - if (result.success) { + if ( result.success ) { return { success: true, response: result.result?.response, @@ -178,60 +178,60 @@ export const executeAgentTest = action({ testId, }; } - } catch (error: any) { + } catch ( error: any ) { return { success: false, error: error.message || "Agent execution failed", }; } }, -}); +} ); /** * Generate requirements.txt for agent */ -function generateRequirements(agent: any): string { +function generateRequirements( agent: any ): string { const baseRequirements = [ "bedrock-agentcore-starter-toolkit>=1.0.0", "strands-agents>=1.0.0", ]; const toolRequirements: string[] = []; - if (agent.tools) { - for (const tool of agent.tools) { - if (tool.pipPackages) { - toolRequirements.push(...tool.pipPackages); + if ( agent.tools ) { + for ( const tool of agent.tools ) { + if ( tool.pipPackages ) { + toolRequirements.push( ...tool.pipPackages ); } - if (tool.extrasPip) { - toolRequirements.push(tool.extrasPip); + if ( tool.extrasPip ) { + toolRequirements.push( tool.extrasPip ); } } } // Convert strands-tools to strands-agents-tools as per AgentCore docs - const finalRequirements = [...baseRequirements, ...toolRequirements].map(req => - req.includes("strands-tools") ? req.replace("strands-tools", "strands-agents-tools") : req + const finalRequirements = [...baseRequirements, ...toolRequirements].map( req => + req.includes( "strands-tools" ) ? req.replace( "strands-tools", "strands-agents-tools" ) : req ); - return Array.from(new Set(finalRequirements)).join("\n"); + return Array.from( new Set( finalRequirements ) ).join( "\n" ); } /** * Check AgentCore status and user limits */ -export const getAgentCoreStatus = action({ +export const getAgentCoreStatus = action( { args: {}, - handler: async (ctx): Promise<{ success: boolean; error?: string; agentCoreConnected?: boolean; agentCoreStatus?: string; userTier?: string; monthlyTests?: number; testLimit?: number; remainingTests?: number | string; rateLimitStatus?: string }> => { - const userId = await getAuthUserId(ctx); - if (!userId) { - throw new Error("Not authenticated"); + handler: async ( ctx ): Promise<{ success: boolean; error?: string; agentCoreConnected?: boolean; agentCoreStatus?: string; userTier?: string; monthlyTests?: number; testLimit?: number; remainingTests?: number | string; rateLimitStatus?: string }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Not authenticated" ); } try { // Test MCP server connection - const connectionTest: any = await ctx.runAction(api.mcpClient.testMCPServerConnection, { + const connectionTest: any = await ctx.runAction( api.mcpClient.testMCPServerConnection, { serverName: "bedrock-agentcore-mcp-server", - }); + } ); // Get user stats - simplified for now const user: any = { tier: "freemium" }; // Simplified for now @@ -239,13 +239,9 @@ export const getAgentCoreStatus = action({ const monthlyTests: any = 0; // Simplified for now - const limits = { - freemium: 50, - personal: 500, - enterprise: -1, - }; - - const limit = limits[tier as keyof typeof limits] || 50; + const { getTierConfig: getTierCfg } = await import( "./lib/tierConfig" ); + const tierCfg = getTierCfg( tier ); + const limit = tierCfg.monthlyExecutions; // -1 = unlimited return { success: true, @@ -254,14 +250,14 @@ export const getAgentCoreStatus = action({ userTier: tier, monthlyTests, testLimit: limit, - remainingTests: limit === -1 ? "unlimited" : Math.max(0, limit - monthlyTests), + remainingTests: limit === -1 ? "unlimited" : Math.max( 0, limit - monthlyTests ), rateLimitStatus: limit === -1 || monthlyTests < limit ? "ok" : "exceeded", }; - } catch (error: any) { + } catch ( error: any ) { return { success: false, error: error.message || "Failed to get AgentCore status", }; } }, -}); \ No newline at end of file +} ); \ No newline at end of file diff --git a/convex/agentcoreTestExecution.ts b/convex/agentcoreTestExecution.ts index 3c3035c..937a125 100644 --- a/convex/agentcoreTestExecution.ts +++ b/convex/agentcoreTestExecution.ts @@ -1,245 +1,454 @@ "use node"; /** - * AgentCore Test Execution - * Executes agent tests in Bedrock AgentCore sandbox (for Bedrock models) + * AgentCore Test Execution - Cost Optimized + * + * Executes agent tests using the cheapest possible method: + * 1. Direct Bedrock API (cheapest) + * 2. Lambda with @app.entrypoint (backup) + * 3. No MCP server complexity */ import { internalAction } from "./_generated/server"; import { v } from "convex/values"; -import { api, internal } from "./_generated/api"; +import { internal } from "./_generated/api"; /** - * Execute agent test in AgentCore sandbox - * Called by queueProcessor for Bedrock models + * Execute agent test with cost-optimized approach + * Primary: Direct Bedrock API (cheapest) + * Backup: Lambda with @app.entrypoint (reliable) + * No MCP server complexity */ -export const executeAgentCoreTest = internalAction({ +export const executeAgentCoreTest = internalAction( { args: { - testId: v.id("testExecutions"), - agentId: v.id("agents"), + testId: v.id( "testExecutions" ), + agentId: v.id( "agents" ), input: v.string(), - conversationHistory: v.optional(v.array(v.any())), + conversationHistory: v.optional( v.array( v.any() ) ), }, - handler: async (ctx, args): Promise<{ success: boolean; response?: string; error?: string; executionTime?: number }> => { + handler: async ( ctx, args ) => { const startTime = Date.now(); try { - // Get agent - const agent: any = await ctx.runQuery(internal.agents.getInternal, { id: args.agentId }); - if (!agent) { - throw new Error("Agent not found"); + // Resolve test + agent first so all limits and usage tracking are keyed to the test submitter. + const [testDetails, agent] = await Promise.all( [ + ctx.runQuery( internal.testExecution.getTestByIdInternal, { testId: args.testId } ), + ctx.runQuery( internal.agents.getInternal, { id: args.agentId } ), + ] ); + + if ( !testDetails ) { + return { success: false, error: "Test not found" }; } - // Update test status to running - await ctx.runMutation(internal.testExecution.updateStatus, { + // Update status to running + await ctx.runMutation( internal.testExecution.updateStatus, { testId: args.testId, status: "RUNNING", - }); - - // Try Lambda first (runs actual agent code with @app.entrypoint) - let result = await executeViaLambda({ - agentCode: agent.generatedCode, - input: args.input, - modelId: agent.model, - tools: agent.tools || [], - }); - - if (!result.success) { - const isTimeout = result.error?.includes("timeout"); - const is4xxOr5xx = result.error?.includes("4") || result.error?.includes("5"); - - if (isTimeout) { - // 200 but no response - wait 2s and retry Lambda once - await new Promise(resolve => setTimeout(resolve, 2000)); - result = await executeViaLambda({ + } ); + + // CHECK LIMITS: keyed to test owner, not agent creator. + if ( !agent ) { + await ctx.runMutation( internal.testExecution.updateStatus, { + testId: args.testId, + status: "FAILED", + success: false, + error: "Agent not found", + } ); + return { success: false, error: "Agent not found" }; + } + + const executionUserId = testDetails.userId; + const user = await ctx.runQuery( internal.users.getInternal, { id: executionUserId } ); + const tier = user?.tier || "freemium"; + + const { getTierConfig, isProviderAllowedForTier } = await import( "./lib/tierConfig" ); + const tierCfg = getTierConfig( tier ); + + // Burst rate limit: enforce tier-aware per-minute ceiling + const { checkRateLimit, buildTierRateLimitConfig } = await import( "./rateLimiter" ); + const rlCfg = buildTierRateLimitConfig( tierCfg.maxConcurrentTests, "agentTesting" ); + const rlResult = await checkRateLimit( ctx, String( executionUserId ), "agentTesting", rlCfg ); + if ( !rlResult.allowed ) { + await ctx.runMutation( internal.testExecution.updateStatus, { + testId: args.testId, + status: "FAILED", + success: false, + error: rlResult.reason || "Rate limit exceeded. Please wait before submitting more tests.", + } ); + return { success: false, error: "Burst rate limit exceeded" }; + } + + // PROVIDER TIER GATE: Enforce per-tier allowed provider rules + // (mirrors strandsAgentExecution.ts, strandsAgentExecutionDynamic.ts, + // and testExecution.ts logic). + const isOllama = agent.modelProvider === "ollama"; + if ( !isOllama && !isProviderAllowedForTier( tier, "bedrock" ) ) { + await ctx.runMutation( internal.testExecution.updateStatus, { + testId: args.testId, + status: "FAILED", + success: false, + error: `${tierCfg.displayName} tier does not allow Bedrock models. ` + + `Allowed providers: ${tierCfg.allowedProviders.join( ", " )}. ` + + `Use Ollama models for free, or upgrade your subscription.`, + } ); + return { success: false, error: "Provider not allowed for tier" }; + } + + // Route based on model provider + let result; + let executionMethod = "bedrock"; + + // Check if agent uses Ollama + if ( isOllama ) { + result = await executeViaOllama( { + input: args.input, + modelId: agent.model, + systemPrompt: agent.systemPrompt, + ollamaEndpoint: agent.ollamaEndpoint || "http://localhost:11434", + conversationHistory: args.conversationHistory, + } ); + executionMethod = "ollama"; + } else { + // PRIMARY: Direct Bedrock API (cheapest) + result = await executeViaDirectBedrock( { + input: args.input, + modelId: agent.model, + systemPrompt: agent.systemPrompt, + conversationHistory: args.conversationHistory, + } ); + + // BACKUP: Lambda with @app.entrypoint if Bedrock fails or SDK misbehaves + if ( !result.success ) { + if ( shouldFallbackToLambda( result.error ) ) { + console.warn( + `Bedrock SDK failure detected (${result.error}). Falling back to Lambda for test ${args.testId}` + ); + } else { + console.log( `Bedrock failed, trying Lambda backup for test ${args.testId}` ); + } + + result = await executeViaLambda( { agentCode: agent.generatedCode, input: args.input, modelId: agent.model, tools: agent.tools || [], - }); - - // If still timeout, fail - if (!result.success) { - return { success: false, error: result.error, executionTime: Date.now() - startTime }; - } - } else if (is4xxOr5xx) { - // 4xx/5xx - try Bedrock fallback once - result = await executeViaDirectBedrock({ - input: args.input, - modelId: agent.model, - systemPrompt: agent.systemPrompt, - }); + } ); + + executionMethod = result.success ? "lambda" : "failed"; } } const executionTime = Date.now() - startTime; - if (!result.success) { - await ctx.runMutation(internal.testExecution.updateStatus, { + if ( result.success ) { + // TRACK USAGE: Only on successful completion + await ctx.runMutation( internal.testExecution.incrementUserUsage, { + userId: executionUserId, + testId: args.testId, + usage: result.result?.usage, + executionTime, + executionMethod, + modelId: agent.model, + } ); + + // Update test with success + await ctx.runMutation( internal.testExecution.updateStatus, { + testId: args.testId, + status: "COMPLETED", + success: true, + response: result.result?.response, + } ); + + return { + success: true, + response: result.result?.response, + executionTime, + executionMethod, + }; + } else { + // Update test with failure (no usage tracking for failures) + await ctx.runMutation( internal.testExecution.updateStatus, { testId: args.testId, status: "FAILED", success: false, - error: result.error || "AgentCore execution failed", - }); - return { success: false, error: result.error, executionTime }; + error: result.error, + } ); + + return { + success: false, + error: result.error, + executionTime, + }; } + } catch ( error: any ) { + const executionTime = Date.now() - startTime; - // Update test with success - await ctx.runMutation(internal.testExecution.updateStatus, { + console.error( "executeAgentCoreTest failed:", error ); + await ctx.runMutation( internal.testExecution.updateStatus, { testId: args.testId, - status: "COMPLETED", - success: true, - response: (result as any).result?.response || "No response", - }); + status: "FAILED", + success: false, + error: error.message, + } ); return { - success: true, - response: (result as any).result?.response, - executionTime, - }; - } catch (error: any) { - await ctx.runMutation(internal.testExecution.updateStatus, { - testId: args.testId, - status: "FAILED", success: false, error: error.message, - }); - return { success: false, error: error.message, executionTime: Date.now() - startTime }; + executionTime, + }; } }, -}); - +} ); /** - * Execute via Lambda (runs actual agent code with @app.entrypoint) + * Execute via direct Bedrock API (cheapest option) + * SUPPORTS ALL BEDROCK MODELS: Claude, Nova, Titan, Llama, Mistral, etc. */ -async function executeViaLambda(params: { - agentCode: string; +async function executeViaDirectBedrock( params: { input: string; modelId: string; - tools: any[]; -}): Promise<{ success: boolean; result?: any; error?: string }> { + systemPrompt: string; + conversationHistory?: any[]; +} ): Promise<{ success: boolean; result?: any; error?: string }> { try { - const { LambdaClient, InvokeCommand } = await import("@aws-sdk/client-lambda"); - - const client = new LambdaClient({ + const { BedrockRuntimeClient, ConverseCommand } = await import( "@aws-sdk/client-bedrock-runtime" ); + + const client = new BedrockRuntimeClient( { region: process.env.AWS_REGION || "us-east-1", - credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, - }, - }); - - const command = new InvokeCommand({ - FunctionName: process.env.AGENT_TEST_LAMBDA_FUNCTION || "agent-builder-test-runner", - InvocationType: "RequestResponse", - Payload: JSON.stringify({ - agentCode: params.agentCode, - input: params.input, - modelId: params.modelId, - tools: params.tools, - }), - }); - - // Set 30s timeout for Lambda response - const timeoutPromise = new Promise((_, reject) => - setTimeout(() => reject(new Error("timeout")), 30000) - ); - - const response = await Promise.race([ - client.send(command), - timeoutPromise - ]); - const statusCode = response.StatusCode || 500; - - // Lambda execution failed (agent code error) - if (response.FunctionError) { - const errorPayload = JSON.parse(new TextDecoder().decode(response.Payload)); - return { - success: false, - error: `Lambda 400: ${errorPayload.errorMessage || "agent code failed"}`, - }; - } - - // Lambda invocation failed (service error) - if (statusCode !== 200 && statusCode !== 202) { - return { - success: false, - error: `Lambda ${statusCode}: invocation failed`, - }; + ...( process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY && { + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, + }, + } ), + } ); + + // Resolve model ID using authoritative shared registry + const { resolveBedrockModelId } = await import( "./modelRegistry.js" ); + const modelId = resolveBedrockModelId( params.modelId ); + + // Build conversation using Converse API (works with ALL Bedrock models) + const messages: any[] = []; + + // Add conversation history if provided (last 5 messages for context) + if ( params.conversationHistory ) { + for ( const msg of params.conversationHistory.slice( -5 ) ) { + if ( msg.role === "user" || msg.role === "assistant" ) { + messages.push( { + role: msg.role, + content: [{ text: msg.content }], + } ); + } + } } - - const result = JSON.parse(new TextDecoder().decode(response.Payload)); + + // Add current input + messages.push( { + role: "user", + content: [{ text: params.input }], + } ); + + // Use Converse API - works with ALL Bedrock models + const command = new ConverseCommand( { + modelId: modelId, + messages: messages, + system: [{ text: params.systemPrompt }], + inferenceConfig: { + maxTokens: 4096, + temperature: 0.7, + }, + } ); + + // Execute with timeout + const response = await Promise.race( [ + client.send( command ), + new Promise( ( _, reject ) => + setTimeout( () => reject( new Error( "Bedrock timeout" ) ), 60000 ) + ), + ] ); + + // Extract response (Converse API format) + const responseText = response.output?.message?.content?.[0]?.text || JSON.stringify( response.output ); + return { success: true, - result: { response: result.response || result.body }, + result: { + response: responseText, + usage: response.usage || {}, + modelId: modelId, + }, + }; + } catch ( error: any ) { + console.error( "Bedrock direct invoke failed:", error ); + const message = error instanceof Error ? error.message : String( error ); + return { + success: false, + error: `Bedrock API failed: ${message}`, }; - } catch (error: any) { - if (error.message === "timeout") { - return { success: false, error: `Lambda timeout: no response after 30s` }; + } +} + +function shouldFallbackToLambda( errorMessage?: string ) { + if ( !errorMessage ) { + return false; + } + return /is not a constructor/i.test( errorMessage ) || /bedrock runtime failed/i.test( errorMessage ); +} + +/** + * Execute via Ollama (local model) + * Uses OpenAI-compatible API + */ +async function executeViaOllama( params: { + input: string; + modelId: string; + systemPrompt: string; + ollamaEndpoint: string; + conversationHistory?: any[]; +} ): Promise<{ success: boolean; result?: any; error?: string }> { + try { + // Build messages array + const messages: any[] = []; + + // Add conversation history if provided (last 5 messages for context) + if ( params.conversationHistory ) { + const validRoles = new Set( ["user", "assistant", "system", "tool"] ); + for ( const msg of params.conversationHistory.slice( -5 ) ) { + if ( validRoles.has( msg.role ) ) { + messages.push( { + role: msg.role, + content: msg.content, + } ); + } + } } - if (error.code === "ResourceNotFoundException") { - return { success: false, error: `Lambda 404: function not found` }; + + // Add system message and current input + messages.unshift( { + role: "system", + content: params.systemPrompt, + } ); + + messages.push( { + role: "user", + content: params.input, + } ); + + // Validate Ollama endpoint to prevent SSRF + const allowedHosts = ["localhost", "127.0.0.1", "::1"]; + try { + const endpointUrl = new URL( params.ollamaEndpoint ); + if ( !allowedHosts.includes( endpointUrl.hostname ) ) { + throw new Error( `Ollama endpoint host '${endpointUrl.hostname}' is not allowed. Only localhost connections are permitted.` ); + } + } catch ( e: any ) { + if ( e.message.includes( "not allowed" ) ) throw e; + throw new Error( `Invalid Ollama endpoint URL: ${params.ollamaEndpoint}` ); + } + + // Call Ollama's OpenAI-compatible endpoint with timeout + const controller = new AbortController(); + const timeoutId = setTimeout( () => controller.abort(), 30000 ); + let response: Response; + try { + response = await fetch( `${params.ollamaEndpoint}/v1/chat/completions`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify( { + model: params.modelId, + messages: messages, + temperature: 0.7, + max_tokens: 4096, + } ), + signal: controller.signal, + } ); + } finally { + clearTimeout( timeoutId ); } - if (error.code === "AccessDeniedException") { - return { success: false, error: `Lambda 403: access denied` }; + + if ( !response.ok ) { + const errorText = await response.text(); + throw new Error( `Ollama API failed: ${response.status} ${errorText}` ); } - return { success: false, error: `Lambda 500: ${error.message}` }; + + const data = await response.json(); + + const responseText = data.choices?.[0]?.message?.content || ""; + + return { + success: true, + result: { + response: responseText, + usage: data.usage || {}, + modelId: params.modelId, + }, + }; + } catch ( error: any ) { + return { + success: false, + error: `Ollama execution failed: ${error.message}`, + }; } } /** - * Fallback: Execute via direct Bedrock API (doesn't run agent code, just model) + * Execute via Lambda with @app.entrypoint (backup option) */ -async function executeViaDirectBedrock(params: { +async function executeViaLambda( params: { + agentCode: string; input: string; modelId: string; - systemPrompt: string; -}): Promise<{ success: boolean; result?: any; error?: string }> { + tools: any[]; +} ): Promise<{ success: boolean; result?: any; error?: string }> { try { - const { BedrockRuntimeClient, InvokeModelCommand } = await import("@aws-sdk/client-bedrock-runtime"); - - const client = new BedrockRuntimeClient({ + const { LambdaClient, InvokeCommand } = await import( "@aws-sdk/client-lambda" ); + + const client = new LambdaClient( { region: process.env.AWS_REGION || "us-east-1", credentials: { accessKeyId: process.env.AWS_ACCESS_KEY_ID!, secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, }, - }); - - let modelId = params.modelId; - if (!modelId.includes(":") && !modelId.startsWith("us.") && !modelId.startsWith("anthropic.")) { - const modelMap: Record = { - "claude-3-5-sonnet-20241022": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", - "claude-3-5-haiku-20241022": "us.anthropic.claude-3-5-haiku-20241022-v1:0", + } ); + + const command = new InvokeCommand( { + FunctionName: process.env.AGENT_TEST_LAMBDA_FUNCTION || "agent-builder-test-runner", + InvocationType: "RequestResponse", + Payload: JSON.stringify( { + agentCode: params.agentCode, // Contains @app.entrypoint + input: params.input, + modelId: params.modelId, + tools: params.tools, + } ), + } ); + + // Execute with timeout + const response = await Promise.race( [ + client.send( command ), + new Promise( ( _, reject ) => + setTimeout( () => reject( new Error( "Lambda timeout" ) ), 30000 ) + ) + ] ); + + if ( response.FunctionError ) { + const rawPayload = response.Payload ? new TextDecoder().decode( response.Payload ) : "{}"; + const errorPayload = JSON.parse( rawPayload ); return { + success: false, + error: `Agent execution failed: ${errorPayload.errorMessage || "unknown error"}`, }; - modelId = modelMap[params.modelId] || "us.anthropic.claude-3-5-haiku-20241022-v1:0"; } - - const command = new InvokeModelCommand({ - modelId, - contentType: "application/json", - accept: "application/json", - body: JSON.stringify({ - anthropic_version: "bedrock-2023-05-31", - max_tokens: 4096, - system: params.systemPrompt, - messages: [{ role: "user", content: [{ type: "text", text: params.input }] }], - }), - }); - - const response = await client.send(command); - const body = JSON.parse(new TextDecoder().decode(response.body)); - const content = body.content?.find((c: any) => c.type === "text")?.text || ""; - + + const result = JSON.parse( new TextDecoder().decode( response.Payload ) ); return { success: true, - result: { response: content }, + result: { response: result.response || result.body }, }; - } catch (error: any) { + } catch ( error: any ) { return { success: false, - error: `Bedrock API failed: ${error.message}`, + error: error.message, }; } } diff --git a/convex/agents.ts b/convex/agents.ts index 4f73926..5dd5d1a 100644 --- a/convex/agents.ts +++ b/convex/agents.ts @@ -61,6 +61,7 @@ export const create = mutation({ name: v.string(), description: v.optional(v.string()), model: v.string(), + modelProvider: v.optional(v.string()), systemPrompt: v.string(), tools: v.array(v.object({ name: v.string(), @@ -85,6 +86,7 @@ export const create = mutation({ env: v.optional(v.any()), disabled: v.optional(v.boolean()), }))), + sourceWorkflowId: v.optional(v.id("workflows")), }, handler: async (ctx, args) => { const userId = await getAuthUserId(ctx); @@ -95,6 +97,7 @@ export const create = mutation({ return await ctx.db.insert("agents", { ...args, createdBy: userId, + modelProvider: args.modelProvider, }); }, }); @@ -129,6 +132,8 @@ export const update = mutation({ env: v.optional(v.any()), disabled: v.optional(v.boolean()), }))), + modelProvider: v.optional(v.string()), + sourceWorkflowId: v.optional(v.id("workflows")), }, handler: async (ctx, args) => { const userId = await getAuthUserId(ctx); diff --git a/convex/apiKeys.ts b/convex/apiKeys.ts index 6963cdd..c1f16f3 100644 --- a/convex/apiKeys.ts +++ b/convex/apiKeys.ts @@ -7,6 +7,7 @@ import { mutation, query, action, internalQuery } from "./_generated/server"; import { v } from "convex/values"; import { getAuthUserId } from "@convex-dev/auth/server"; import { api, internal } from "./_generated/api"; +import { getTierConfig } from "./lib/tierConfig"; /** * Generate API key for user @@ -241,35 +242,15 @@ export const incrementUsage = mutation({ }); /** - * Get tier limits + * Get tier limits - delegates to centralized tierConfig. */ function getTierLimits(tier: string) { - switch (tier) { - case "freemium": - return { - testsPerMonth: 10, - maxConcurrentTests: 1, - maxAgents: 5, - }; - case "personal": - return { - testsPerMonth: 1000, - maxConcurrentTests: 5, - maxAgents: 50, - }; - case "enterprise": - return { - testsPerMonth: 10000, - maxConcurrentTests: 20, - maxAgents: 500, - }; - default: - return { - testsPerMonth: 10, - maxConcurrentTests: 1, - maxAgents: 5, - }; - } + const config = getTierConfig(tier); + return { + testsPerMonth: config.monthlyExecutions === -1 ? Infinity : config.monthlyExecutions, + maxConcurrentTests: config.maxConcurrentTests, + maxAgents: config.maxAgents, + }; } /** diff --git a/convex/auditLogs.ts b/convex/auditLogs.ts new file mode 100644 index 0000000..eab7348 --- /dev/null +++ b/convex/auditLogs.ts @@ -0,0 +1,109 @@ +/** + * Audit Logs API + * Track all significant events for compliance and debugging + */ + +import { internalMutation, query } from "./_generated/server"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; +import { UserRole } from "./users"; +/** + * Log an event to the audit log + * Internal-only: not callable from clients to prevent log spoofing + */ +export const logEvent = internalMutation( { + args: { + eventType: v.string(), + userId: v.optional( v.id( "users" ) ), + action: v.string(), + resource: v.optional( v.string() ), + resourceId: v.optional( v.string() ), + success: v.boolean(), + details: v.optional( v.any() ), + metadata: v.optional( v.object( { + provider: v.optional( v.string() ), + serverName: v.optional( v.string() ), + toolName: v.optional( v.string() ), + agentId: v.optional( v.string() ), + ipAddress: v.optional( v.string() ), + userAgent: v.optional( v.string() ), + } ) ), + }, + handler: async ( ctx, args ) => { + return await ctx.db.insert( "auditLogs", { + eventType: args.eventType, + userId: args.userId, + action: args.action, + resource: args.resource, + resourceId: args.resourceId, + success: args.success, + details: args.details, + metadata: args.metadata, + timestamp: Date.now(), + } ); + }, +} ); + +/** + * Get audit logs for current user + */ +export const getUserLogs = query( { + args: { + limit: v.optional( v.number() ), + }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + return []; + } + + const logs = await ctx.db + .query( "auditLogs" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userId ) ) + .order( "desc" ) + .take( args.limit || 50 ); + + return logs; + }, +} ); + +/** + * Get all audit logs (admin only) + */ +export const getAllLogs = query( { + args: { + limit: v.optional( v.number() ), + eventType: v.optional( v.string() ), + }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + return []; + } + + // Enforce admin-only access before returning any audit logs. + const user = await ctx.db.get( userId ); + if ( !user || ( user as any ).role !== UserRole.ADMIN ) { + // Non-admins are not allowed to view global audit logs. + return []; + } + + if ( args.eventType ) { + const logs = await ctx.db + .query( "auditLogs" ) + .withIndex( "by_event_type", ( q ) => q.eq( "eventType", args.eventType! ) ) + .order( "desc" ) + .take( args.limit || 100 ); + + return logs; + } + + const logs = await ctx.db + .query( "auditLogs" ) + .withIndex( "by_timestamp" ) + .order( "desc" ) + .take( args.limit || 100 ); + + return logs; + }, +} ); diff --git a/convex/auth.ts b/convex/auth.ts index fa40e3c..10f44a5 100644 --- a/convex/auth.ts +++ b/convex/auth.ts @@ -1,5 +1,4 @@ import GitHub from "@auth/core/providers/github"; -import Google from "@auth/core/providers/google"; import { Password } from "@convex-dev/auth/providers/Password"; import { Anonymous } from "@convex-dev/auth/providers/Anonymous"; import { convexAuth, getAuthUserId } from "@convex-dev/auth/server"; @@ -7,18 +6,18 @@ import { query } from "./_generated/server"; import CognitoProvider from '@auth/core/providers/cognito'; // Build providers array with all authentication methods +// Google OAuth removed - was never approved const providers: any[] = [ Anonymous, // Continue as guest - basic setup, no customization needed Password, // Email/password authentication GitHub, // GitHub OAuth - Google, // Google OAuth ]; // AWS Cognito OAuth - OIDC provider for AWS Federated Identity // When users sign in with Cognito, they can exchange their ID token for AWS credentials // This enables deployment to their own AWS accounts -if (process.env.COGNITO_ISSUER_URL && process.env.COGNITO_CLIENT_ID && process.env.COGNITO_CLIENT_SECRET) { - const CognitoConfig = CognitoProvider({ +if ( process.env.COGNITO_ISSUER_URL && process.env.COGNITO_CLIENT_ID && process.env.COGNITO_CLIENT_SECRET ) { + const CognitoConfig = CognitoProvider( { id: "cognito", name: "AWS Cognito", issuer: process.env.COGNITO_ISSUER_URL, @@ -29,7 +28,7 @@ if (process.env.COGNITO_ISSUER_URL && process.env.COGNITO_CLIENT_ID && process.e scope: "openid email profile aws.cognito.signin.user.admin", }, }, - profile(profile: any) { + profile( profile: any ) { return { id: profile.sub, name: profile.name ?? profile.email, @@ -38,21 +37,21 @@ if (process.env.COGNITO_ISSUER_URL && process.env.COGNITO_CLIENT_ID && process.e cognitoUsername: profile["cognito:username"], }; }, - }); - providers.push(CognitoConfig); + } ); + providers.push( CognitoConfig ); } -export const { auth, signIn, signOut, store, isAuthenticated } = convexAuth({ +export const { auth, signIn, signOut, store, isAuthenticated } = convexAuth( { providers, -}); +} ); -export const loggedInUser = query({ - handler: async (ctx) => { - const userId = await getAuthUserId(ctx); - if (!userId) { +export const loggedInUser = query( { + handler: async ( ctx ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return null; } - const user = await ctx.db.get(userId); + const user = await ctx.db.get( userId ); return user ?? null; }, -}); \ No newline at end of file +} ); \ No newline at end of file diff --git a/convex/authDebug.ts b/convex/authDebug.ts index bc371b3..2a65cc5 100644 --- a/convex/authDebug.ts +++ b/convex/authDebug.ts @@ -12,15 +12,6 @@ export const getOAuthConfig = query({ clientSecret: process.env.AUTH_GITHUB_SECRET ? "✓ Set" : "✗ Missing", }, }, - { - id: "google", - name: "Google", - configured: !!(process.env.AUTH_GOOGLE_ID && process.env.AUTH_GOOGLE_SECRET), - envVars: { - clientId: process.env.AUTH_GOOGLE_ID ? "✓ Set" : "✗ Missing", - clientSecret: process.env.AUTH_GOOGLE_SECRET ? "✓ Set" : "✗ Missing", - }, - }, { id: "cognito", name: "AWS Cognito", @@ -41,7 +32,7 @@ export const getOAuthConfig = query({ const deploymentUrls = [ { name: "Local Development", - url: "http://localhost:3000", + url: "http://localhost:4000", description: "For local testing during development", }, { diff --git a/convex/automatedAgentBuilder.ts b/convex/automatedAgentBuilder.ts new file mode 100644 index 0000000..3e24a8c --- /dev/null +++ b/convex/automatedAgentBuilder.ts @@ -0,0 +1,503 @@ +/** + * Automated Agent Builder with Woz-Style Questions + * + * Sequential conversational flow: + * 1. User describes what they want + * 2. AI uses THINK tool to analyze + * 3. AI asks first question with suggestions + * 4. User answers + * 5. AI uses THINK tool to refine understanding + * 6. AI asks next question OR generates agent if ready + * + * Uses strands-agents framework with interleaved reasoning + */ + +import { mutation, action, query, internalMutation, internalQuery } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +/** + * Create a new agent building session + */ +export const createBuildSession = mutation( { + args: { + initialDescription: v.optional( v.string() ), + }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + + if ( !userId ) { + throw new Error( "Authentication required to build agents" ); + } + + // Initialize session with agent requirements + const sessionId = await ctx.db.insert( "agentBuildSessions", { + userId, + status: "gathering_requirements", + currentQuestion: 0, + agentRequirements: { + agentType: null, + targetUsers: null, + problems: [], + tools: [], + tone: null, + testingPreference: null, + domainKnowledge: null, + knowledgeBase: null, + documentUrls: [], + }, + conversationHistory: args.initialDescription + ? [ + { + role: "user", + content: args.initialDescription, + timestamp: Date.now(), + }, + ] + : [], + createdAt: Date.now(), + updatedAt: Date.now(), + } ); + + return { sessionId }; + }, +} ); + +/** + * Get build session + */ +export const getBuildSession = query( { + args: { + sessionId: v.id( "agentBuildSessions" ), + }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) return null; + + const session = await ctx.db.get( args.sessionId ); + if ( !session || session.userId !== userId ) { + return null; + } + + return session; + }, +} ); + +/** + * Get user's build sessions + */ +export const getUserBuildSessions = query( { + args: { + limit: v.optional( v.number() ), + }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) return []; + + return await ctx.db + .query( "agentBuildSessions" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userId ) ) + .order( "desc" ) + .take( args.limit || 10 ); + }, +} ); + +/** + * Process user response and ask next question + * Uses strands-agents THINK tool for interleaved reasoning + */ +export const processResponse = action( { + args: { + sessionId: v.id( "agentBuildSessions" ), + userResponse: v.string(), + }, + handler: async ( ctx, args ) => { + // Verify caller owns this session + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Authentication required" ); + } + + // Get session + const session = await ctx.runQuery( internal.automatedAgentBuilder.getBuildSessionInternal, { + sessionId: args.sessionId, + } ); + + if ( !session ) { + throw new Error( "Build session not found" ); + } + + // Add user message to history + const updatedHistory = [ + ...session.conversationHistory, + { + role: "user" as const, + content: args.userResponse, + timestamp: Date.now(), + }, + ]; + + // Gate: enforce tier-based Bedrock access + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const modelId = process.env.AGENT_BUILDER_MODEL_ID || "anthropic.claude-haiku-4-5-20251001-v1:0"; + const gateResult = await requireBedrockAccess( + ctx, + modelId, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + throw new Error( gateResult.reason ); + } + + // Use Claude Haiku 4.5 with interleaved thinking to analyze and ask next question + const systemPrompt = buildSystemPrompt( session.agentRequirements ); + const response = await analyzeAndAskNext( systemPrompt, updatedHistory ); + + // Meter token usage for billing + if ( response.tokenUsage && gateResult.allowed ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId, + inputTokens: response.tokenUsage.inputTokens, + outputTokens: response.tokenUsage.outputTokens, + } ); + } + + // Parse response to extract: + // 1. Thinking/reasoning + // 2. Updated requirements + // 3. Next question OR ready to generate + const { thinking, requirements, nextQuestion, readyToGenerate, agentConfig } = response; + + // Update session + await ctx.runMutation( internal.automatedAgentBuilder.updateBuildSession, { + sessionId: args.sessionId, + conversationHistory: [ + ...updatedHistory, + { + role: "assistant", + content: nextQuestion || "Ready to generate your agent!", + reasoning: thinking, + timestamp: Date.now(), + }, + ], + agentRequirements: requirements ?? session.agentRequirements, + currentQuestion: session.currentQuestion + 1, + status: readyToGenerate ? "ready_to_generate" : "gathering_requirements", + generatedAgentConfig: readyToGenerate ? agentConfig : undefined, + } ); + + return { + thinking, + nextQuestion, + readyToGenerate, + agentConfig, + requirements, + }; + }, +} ); + +/** + * Internal query to get build session + */ +export const getBuildSessionInternal = internalQuery( { + args: { + sessionId: v.id( "agentBuildSessions" ), + }, + handler: async ( ctx, args ) => { + return await ctx.db.get( args.sessionId ); + }, +} ); + +/** + * Internal mutation to update build session + */ +export const updateBuildSession = internalMutation( { + args: { + sessionId: v.id( "agentBuildSessions" ), + conversationHistory: v.any(), + agentRequirements: v.any(), + currentQuestion: v.number(), + status: v.string(), + generatedAgentConfig: v.optional( v.any() ), + }, + handler: async ( ctx, args ) => { + await ctx.db.patch( args.sessionId, { + conversationHistory: args.conversationHistory, + agentRequirements: args.agentRequirements, + currentQuestion: args.currentQuestion, + status: args.status, + generatedAgentConfig: args.generatedAgentConfig, + updatedAt: Date.now(), + } ); + }, +} ); + +/** + * Build system prompt for current question + */ +function buildSystemPrompt( requirements: any ): string { + return `You are an intelligent agent builder assistant. Your goal is to gather requirements to build the perfect agent. + +CURRENT REQUIREMENTS: +${JSON.stringify( requirements, null, 2 )} + +WOZ-STYLE QUESTIONS (ask in order, skip if already answered): +1. What kind of agent? (Suggest: Customer Support, Code Reviewer, Research Assistant, Data Analyst, Content Creator, Compliance Consultant) +2. Who will use it? (Infer when obvious, e.g., code review → developers) +3. What problems does it solve? (Be specific, provide relevant suggestions) +4. What tools needed? (Suggest: Web search, GitHub, Database, Code execution, File ops, Email, API integrations) +5. What tone/style? (Suggest: Professional, Friendly, Technical, Creative, Formal) +6. How to test? (Suggest: Local Ollama FREE unlimited, Cloud Bedrock 50/month free, or Both) +7. Domain-specific knowledge needed? (Suggest: FedRAMP docs, coding standards, industry regulations, product docs) +8. Need knowledge base? (Suggest: Upload docs, provide URLs, scrape website, use existing sources) + +YOUR WORKFLOW: +1. THINK deeply about the user's response +2. Update your understanding of requirements +3. Determine if you have enough info to generate the agent +4. If ready: Provide complete agent config +5. If not ready: Ask the MOST IMPORTANT unanswered question with 3-5 relevant suggestions + +INTELLIGENT BEHAVIOR: +✅ Use interleaved reasoning (THINK tool) before each response +✅ Infer answers from context when obvious +✅ Skip questions you can confidently answer yourself +✅ Provide 3-5 specific, relevant suggestions with each question +✅ Know when you have enough information +✅ Be conversational and friendly + +OUTPUT FORMAT: +If ready to generate, output JSON: +{ + "readyToGenerate": true, + "agentConfig": { + "name": "Agent Name", + "model": "model-id", + "systemPrompt": "detailed prompt", + "tools": [...], + "deploymentType": "aws" | "ollama", + "tone": "...", + "domainKnowledge": "...", + "knowledgeBase": {...} + } +} + +If not ready, output JSON: +{ + "readyToGenerate": false, + "nextQuestion": "Your question here?", + "suggestions": ["Suggestion 1", "Suggestion 2", ...] +} + +Think deeply, ask smart questions, and build exceptional agents.`; +} + +/** + * Use Bedrock to analyze and ask next question + * Model is configurable via AGENT_BUILDER_MODEL_ID env var + */ +async function analyzeAndAskNext( + systemPrompt: string, + conversationHistory: any[] +): Promise<{ + thinking: string; + requirements: any; + nextQuestion: string | null; + readyToGenerate: boolean; + agentConfig: any | null; + tokenUsage: { inputTokens: number; outputTokens: number; totalTokens: number }; +}> { + const { BedrockRuntimeClient, InvokeModelCommand } = await import( "@aws-sdk/client-bedrock-runtime" ); + + const accessKeyId = process.env.AWS_ACCESS_KEY_ID; + const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; + const region = process.env.AWS_REGION || "us-east-1"; + + if ( !accessKeyId || !secretAccessKey ) { + throw new Error( "Missing AWS credentials: ensure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set in the environment" ); + } + + const client = new BedrockRuntimeClient( { + region, + credentials: { + accessKeyId, + secretAccessKey, + }, + } ); + + // Build messages in Bedrock format + const messages = conversationHistory.map( ( msg ) => ( { + role: msg.role as "user" | "assistant", + content: [{ type: "text", text: msg.content }], + } ) ); + + const modelId = process.env.AGENT_BUILDER_MODEL_ID || "anthropic.claude-haiku-4-5-20251001-v1:0"; + + const payload = { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 4096, + // Use lower temperature to produce more stable structured JSON responses + temperature: 0.5, + system: systemPrompt, + messages, + }; + + const command = new InvokeModelCommand( { + modelId, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify( payload ), + } ); + + let responseBody: any; + try { + const response: any = await client.send( command ); + const decoded = new TextDecoder().decode( response.body ); + try { + responseBody = JSON.parse( decoded ); + } catch ( parseErr: any ) { + console.error( "Failed to parse Bedrock response body", { modelId, error: parseErr.message, responseLength: decoded.length, responsePreview: decoded.slice( 0, 100 ) + ( decoded.length > 100 ? "..." : "" ) } ); + throw new Error( `Failed to parse Bedrock response body: ${parseErr.message}` ); + } + } catch ( err: any ) { + console.error( "Bedrock model invocation failed", { modelId, err } ); + throw new Error( `Bedrock model invocation failed: ${err.message}` ); + } + + // Extract token usage for billing + const { extractTokenUsage, estimateTokenUsage } = await import( "./lib/tokenBilling" ); + let tokenUsage = extractTokenUsage( responseBody, modelId ); + + // Extract text response + let textResponse = ""; + for ( const block of responseBody.content || [] ) { + if ( block.type === "text" ) { + textResponse += block.text; + } + } + + // Fallback to character-based estimation if provider did not return token counts + if ( tokenUsage.totalTokens === 0 ) { + const inputText = systemPrompt + conversationHistory.map( ( m ) => m.content ).join( " " ); + tokenUsage = estimateTokenUsage( inputText, textResponse ); + } + + // Parse response - AI should return structured JSON + try { + const parsed = JSON.parse( textResponse ); + + if ( parsed.readyToGenerate ) { + return { + thinking: parsed.reasoning || "Agent requirements gathered successfully", + requirements: parsed.agentConfig, + nextQuestion: null, + readyToGenerate: true, + agentConfig: parsed.agentConfig, + tokenUsage, + }; + } else { + const suggestions = parsed.suggestions || []; + const formattedQuestion = suggestions.length > 0 + ? `${parsed.nextQuestion}\n\nSuggestions:\n${suggestions.map( ( s: string, i: number ) => `${i + 1}. ${s}` ).join( "\n" )}` + : parsed.nextQuestion; + + return { + thinking: parsed.reasoning || "Analyzing requirements...", + requirements: parsed.requirements || parsed.partialConfig || {}, + nextQuestion: formattedQuestion, + readyToGenerate: false, + agentConfig: null, + tokenUsage, + }; + } + } catch ( error ) { + // Fallback if JSON parsing fails - treat as next question + return { + thinking: "Processing user input...", + requirements: null, + nextQuestion: textResponse, + readyToGenerate: false, + agentConfig: null, + tokenUsage, + }; + } +} + +/** + * Generate agent from build session + */ +export const generateAgentFromSession = action( { + args: { + sessionId: v.id( "agentBuildSessions" ), + }, + handler: async ( ctx, args ): Promise<{ + success: boolean; + agentId: any; + generatedCode: string; + requirementsTxt: string | null; + mcpConfig: string | null; + }> => { + // Verify caller owns this session + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Authentication required" ); + } + + const session = await ctx.runQuery( internal.automatedAgentBuilder.getBuildSessionInternal, { + sessionId: args.sessionId, + } ); + + if ( !session || session.status !== "ready_to_generate" || !session.generatedAgentConfig ) { + throw new Error( "Session not ready to generate agent" ); + } + + const config = session.generatedAgentConfig; + + // Generate agent code using codeGenerator + const result = await ctx.runAction( api.codeGenerator.generateAgent, { + name: config.name, + model: config.model, + systemPrompt: config.systemPrompt, + tools: config.tools || [], + deploymentType: config.deploymentType || "aws", + mcpServers: config.mcpServers, + dynamicTools: config.dynamicTools, + } ); + + // Create agent in database + const agentId: any = await ctx.runMutation( api.agents.create, { + name: config.name, + description: `AI-generated agent: ${config.name}`, + model: config.model, + systemPrompt: config.systemPrompt, + tools: config.tools || [], + generatedCode: result.generatedCode, + dockerConfig: "", // Will be generated if needed + deploymentType: config.deploymentType || "aws", + isPublic: false, + exposableAsMCPTool: false, + mcpToolName: "", + mcpInputSchema: undefined, + } ); + + // Update session as completed + await ctx.runMutation( internal.automatedAgentBuilder.updateBuildSession, { + sessionId: args.sessionId, + conversationHistory: session.conversationHistory, + agentRequirements: session.agentRequirements, + currentQuestion: session.currentQuestion, + status: "completed", + generatedAgentConfig: { ...config, agentId }, + } ); + + return { + success: true, + agentId, + generatedCode: result.generatedCode, + requirementsTxt: result.requirementsTxt, + mcpConfig: result.mcpConfig, + }; + }, +} ); diff --git a/convex/awsDeployment.ts b/convex/awsDeployment.ts index 70c36f6..828c2d3 100644 --- a/convex/awsDeployment.ts +++ b/convex/awsDeployment.ts @@ -7,6 +7,12 @@ import { action, internalAction, mutation, query, internalMutation, internalQuery } from "./_generated/server"; import { v } from "convex/values"; import { internal, api } from "./_generated/api"; + +// Stripe mutations live in stripeMutations.ts. Cast bridges codegen gap. + +const internalStripeMutations = ( internal as any ).stripeMutations; +// Direct import for mutation handlers (mutations cannot call ctx.runMutation) +import { incrementUsageAndReportOverageImpl } from "./stripeMutations"; import { getAuthUserId } from "@convex-dev/auth/server"; import { assembleDeploymentPackageFiles } from "./deploymentPackageGenerator"; import { sanitizeAgentName } from "./constants"; @@ -15,109 +21,121 @@ import { sanitizeAgentName } from "./constants"; * Deploy agent - Routes to correct tier (Tier 1/2/3) * This is the main entry point that replaces the old deployToAWS */ -export const deployToAWS = action({ +export const deployToAWS = action( { args: { - agentId: v.id("agents"), - deploymentConfig: v.object({ + agentId: v.id( "agents" ), + deploymentConfig: v.object( { region: v.string(), agentName: v.string(), - description: v.optional(v.string()), - enableMonitoring: v.optional(v.boolean()), - enableAutoScaling: v.optional(v.boolean()), - }), + description: v.optional( v.string() ), + enableMonitoring: v.optional( v.boolean() ), + enableAutoScaling: v.optional( v.boolean() ), + } ), // Optional: Provide AWS credentials directly (for anonymous users) - awsCredentials: v.optional(v.object({ + awsCredentials: v.optional( v.object( { accessKeyId: v.string(), secretAccessKey: v.string(), - roleArn: v.optional(v.string()), - })), + roleArn: v.optional( v.string() ), + } ) ), }, - handler: async (ctx, args): Promise => { + handler: async ( ctx, args ): Promise => { // Get user ID (can be anonymous) - const userId = await getAuthUserId(ctx); + const userId = await getAuthUserId( ctx ); // Get agent - const agent: any = await ctx.runQuery(internal.agents.getInternal, { + const agent: any = await ctx.runQuery( internal.agents.getInternal, { id: args.agentId - }); + } ); - if (!agent) { - throw new Error("Agent not found"); + if ( !agent ) { + throw new Error( "Agent not found" ); } // Verify ownership (allow anonymous users to deploy their own agents) - if (userId && agent.createdBy !== userId) { - throw new Error("Not authorized to deploy this agent"); + if ( userId && agent.createdBy !== userId ) { + throw new Error( "Not authorized to deploy this agent" ); } // Get user tier (default to freemium for anonymous users) - const user = userId ? await ctx.runQuery(internal.awsDeployment.getUserTierInternal, { + const user = userId ? await ctx.runQuery( internal.awsDeployment.getUserTierInternal, { userId: userId, - }) : null; + } ) : null; const tier = user?.tier || "freemium"; + // PROVIDER GATING: Freemium users cannot deploy to Bedrock (all AWS deployments use Bedrock) + const { isProviderAllowedForTier } = await import( "./lib/tierConfig" ); + if ( !isProviderAllowedForTier( tier, "bedrock" ) ) { + throw new Error( + "Free tier cannot deploy to AWS Bedrock. " + + "Upgrade to Personal ($5/month) for Bedrock access, " + + "or use Ollama models for unlimited FREE local testing." + ); + } + // Check if user provided AWS credentials directly (for anonymous/one-time deployment) - if (args.awsCredentials) { + if ( args.awsCredentials ) { // TODO: Implement direct credential deployment - throw new Error("Direct AWS credential deployment not yet implemented. Please save your AWS credentials in settings first."); + throw new Error( "Direct AWS credential deployment not yet implemented. Please save your AWS credentials in settings first." ); } // Check if user has AWS credentials configured (saved) - const hasAWSCreds = userId ? await ctx.runQuery(api.awsAuth.hasValidAWSCredentials) : false; + const hasAWSCreds = userId ? await ctx.runQuery( api.awsAuth.hasValidAWSCredentials ) : false; // If user has saved AWS credentials, deploy to THEIR account (Tier 2) - if (hasAWSCreds && userId) { - return await deployTier2(ctx, args, userId); + if ( hasAWSCreds && userId ) { + return await deployTier2( ctx, args, userId ); } // Otherwise, use platform deployment (Tier 1) - if (tier === "freemium") { + if ( tier === "freemium" ) { // Anonymous users must provide AWS credentials - if (!userId) { - throw new Error("Anonymous users must provide AWS credentials or sign in to use the platform."); + if ( !userId ) { + throw new Error( "Anonymous users must provide AWS credentials or sign in to use the platform." ); } - // Tier 1: Check usage limits - const testsThisMonth = user?.testsThisMonth || 0; - if (testsThisMonth >= 10) { - throw new Error("Free tier limit reached (10 tests/month). Configure AWS credentials to deploy to your own account!"); + // Tier 1: Check usage limits using centralized tier config + const executionsThisMonth = user?.executionsThisMonth || 0; + const { getTierConfig: getFreeTierCfg } = await import( "./lib/tierConfig" ); + const freeLimits = getFreeTierCfg( "freemium" ); + if ( executionsThisMonth >= freeLimits.monthlyExecutions ) { + throw new Error( `Free tier limit reached (${freeLimits.monthlyExecutions} executions/month). Configure AWS credentials to deploy to your own account!` ); } // Deploy to platform Fargate - return await deployTier1(ctx, args, userId); - } else if (tier === "enterprise") { + return await deployTier1( ctx, args, userId ); + } else if ( tier === "enterprise" ) { // Tier 3: Enterprise SSO (not implemented yet) - throw new Error("Enterprise tier not yet implemented"); + throw new Error( "Enterprise tier not yet implemented" ); } // Fallback to Tier 1 - requires authentication - if (!userId) { - throw new Error("Authentication required for deployment."); + if ( !userId ) { + throw new Error( "Authentication required for deployment." ); } - return await deployTier1(ctx, args, userId); + return await deployTier1( ctx, args, userId ); }, -}); +} ); /** * Execute the actual deployment */ -export const executeDeployment = internalAction({ +export const executeDeployment = internalAction( { args: { - deploymentId: v.id("deployments"), - agentId: v.id("agents"), - config: v.object({ + deploymentId: v.id( "deployments" ), + agentId: v.id( "agents" ), + config: v.object( { region: v.string(), agentName: v.string(), - description: v.optional(v.string()), - enableMonitoring: v.optional(v.boolean()), - enableAutoScaling: v.optional(v.boolean()), - }), + description: v.optional( v.string() ), + enableMonitoring: v.optional( v.boolean() ), + enableAutoScaling: v.optional( v.boolean() ), + } ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { try { // Update status to building with progress tracking - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -127,22 +145,22 @@ export const executeDeployment = internalAction({ currentStep: "Building container", totalSteps: 5, }, - }); + } ); // Get agent details - const agent = await ctx.runQuery(internal.agents.getInternal, { + const agent = await ctx.runQuery( internal.agents.getInternal, { id: args.agentId - }); + } ); - if (!agent) { - throw new Error("Agent not found"); + if ( !agent ) { + throw new Error( "Agent not found" ); } // Generate deployment artifacts - const artifacts = await generateDeploymentArtifacts(agent, args.config); + const artifacts = await generateDeploymentArtifacts( agent, args.config ); // Update status to deploying with progress - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "DEPLOYING", progress: { @@ -152,13 +170,13 @@ export const executeDeployment = internalAction({ currentStep: "Deploying to AWS", totalSteps: 5, }, - }); + } ); // Deploy to AWS using AgentCore CLI - const deploymentResult = await deployToAgentCore(artifacts, args.config); + const deploymentResult = await deployToAgentCore( artifacts, args.config ); // Update status to completed with final progress - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "COMPLETED", progress: { @@ -168,13 +186,13 @@ export const executeDeployment = internalAction({ currentStep: "Completed", totalSteps: 5, }, - }); + } ); return deploymentResult; - } catch (error: any) { + } catch ( error: any ) { // Update status to failed with error details - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "FAILED", progress: { @@ -183,41 +201,41 @@ export const executeDeployment = internalAction({ message: `Deployment failed: ${error.message}`, }, error: error.message, - }); + } ); throw error; } }, -}); +} ); // Removed: Use createDeploymentInternal instead /** * Update deployment status with enhanced progress tracking */ -export const updateDeploymentStatus = mutation({ +export const updateDeploymentStatus = mutation( { args: { - deploymentId: v.id("deployments"), + deploymentId: v.id( "deployments" ), status: v.string(), - message: v.optional(v.string()), - result: v.optional(v.any()), - error: v.optional(v.string()), - progress: v.optional(v.number()), // 0-100 - currentStep: v.optional(v.string()), - totalSteps: v.optional(v.number()), - stepDetails: v.optional(v.object({ + message: v.optional( v.string() ), + result: v.optional( v.any() ), + error: v.optional( v.string() ), + progress: v.optional( v.number() ), // 0-100 + currentStep: v.optional( v.string() ), + totalSteps: v.optional( v.number() ), + stepDetails: v.optional( v.object( { stepName: v.string(), stepIndex: v.number(), totalSteps: v.number(), stepStatus: v.string(), // "running", "completed", "failed" - stepMessage: v.optional(v.string()), - estimatedTimeRemaining: v.optional(v.number()), // seconds - })), + stepMessage: v.optional( v.string() ), + estimatedTimeRemaining: v.optional( v.number() ), // seconds + } ) ), }, - handler: async (ctx, args) => { - const deployment = await ctx.db.get(args.deploymentId); - if (!deployment) { - throw new Error("Deployment not found"); + handler: async ( ctx, args ) => { + const deployment = await ctx.db.get( args.deploymentId ); + if ( !deployment ) { + throw new Error( "Deployment not found" ); } const updates: any = { @@ -227,32 +245,32 @@ export const updateDeploymentStatus = mutation({ // Note: message is not in schema, stored in logs instead - if (args.result) { + if ( args.result ) { updates.result = args.result; } - if (args.error) { + if ( args.error ) { updates.error = args.error; } - if (args.progress !== undefined) { - updates.progress = Math.max(0, Math.min(100, args.progress)); + if ( args.progress !== undefined ) { + updates.progress = Math.max( 0, Math.min( 100, args.progress ) ); } - if (args.currentStep) { + if ( args.currentStep ) { updates.currentStep = args.currentStep; } - if (args.totalSteps) { - (updates as any).totalSteps = args.totalSteps; + if ( args.totalSteps ) { + ( updates ).totalSteps = args.totalSteps; } - if (args.stepDetails) { - (updates as any).stepDetails = args.stepDetails; + if ( args.stepDetails ) { + ( updates ).stepDetails = args.stepDetails; } // Add log entry for status changes - const existingLogs: any[] = Array.isArray(deployment.logs) ? deployment.logs : []; + const existingLogs: any[] = Array.isArray( deployment.logs ) ? deployment.logs : []; const newLogEntry = { timestamp: Date.now(), level: args.status === "FAILED" ? "error" : "info", @@ -263,40 +281,40 @@ export const updateDeploymentStatus = mutation({ updates.logs = [...existingLogs, newLogEntry] as any; // Set completion timestamp - if (args.status === "COMPLETED" || args.status === "FAILED") { + if ( args.status === "COMPLETED" || args.status === "FAILED" ) { updates.completedAt = Date.now(); updates.progress = args.status === "COMPLETED" ? 100 : updates.progress; } // Calculate deployment duration - if (deployment.createdAt) { - (updates as any).duration = Date.now() - deployment.createdAt; + if ( deployment.createdAt ) { + ( updates ).duration = Date.now() - deployment.createdAt; } - await ctx.db.patch(args.deploymentId, updates); + await ctx.db.patch( args.deploymentId, updates ); // Return updated deployment for real-time updates - return await ctx.db.get(args.deploymentId); + return await ctx.db.get( args.deploymentId ); }, -}); +} ); /** * Add deployment log entry */ -export const addDeploymentLog = mutation({ +export const addDeploymentLog = mutation( { args: { - deploymentId: v.id("deployments"), + deploymentId: v.id( "deployments" ), level: v.string(), // "info", "warn", "error", "debug" message: v.string(), - details: v.optional(v.any()), + details: v.optional( v.any() ), }, - handler: async (ctx, args) => { - const deployment = await ctx.db.get(args.deploymentId); - if (!deployment) { - throw new Error("Deployment not found"); + handler: async ( ctx, args ) => { + const deployment = await ctx.db.get( args.deploymentId ); + if ( !deployment ) { + throw new Error( "Deployment not found" ); } - const existingLogs: any[] = Array.isArray(deployment.logs) ? deployment.logs : []; + const existingLogs: any[] = Array.isArray( deployment.logs ) ? deployment.logs : []; const newLogEntry = { timestamp: Date.now(), level: args.level, @@ -304,37 +322,37 @@ export const addDeploymentLog = mutation({ source: "manual", }; - await ctx.db.patch(args.deploymentId, { + await ctx.db.patch( args.deploymentId, { logs: [...existingLogs, newLogEntry] as any, updatedAt: Date.now(), - }); + } ); }, -}); +} ); /** * Get deployment with real-time status */ -export const getDeploymentWithLogs = query({ - args: { deploymentId: v.id("deployments") }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { +export const getDeploymentWithLogs = query( { + args: { deploymentId: v.id( "deployments" ) }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return null; } - const deployment = await ctx.db.get(args.deploymentId); - if (!deployment || deployment.userId !== userId) { + const deployment = await ctx.db.get( args.deploymentId ); + if ( !deployment || deployment.userId !== userId ) { return null; } // Calculate additional metrics const now = Date.now(); const elapsed = deployment.createdAt ? now - deployment.createdAt : 0; - const isActive = !["COMPLETED", "FAILED", "CANCELLED"].includes(deployment.status); + const isActive = !["COMPLETED", "FAILED", "CANCELLED"].includes( deployment.status ); // Estimate remaining time based on current progress let estimatedTimeRemaining = null; - if (isActive && deployment.progress && deployment.progress.percentage > 0) { + if ( isActive && deployment.progress && deployment.progress.percentage > 0 ) { const progressRate = deployment.progress.percentage / elapsed; const remainingProgress = 100 - deployment.progress.percentage; estimatedTimeRemaining = remainingProgress / progressRate; @@ -345,85 +363,85 @@ export const getDeploymentWithLogs = query({ elapsed, isActive, estimatedTimeRemaining, - formattedDuration: formatDuration(elapsed), + formattedDuration: formatDuration( elapsed ), progressPercentage: deployment.progress?.percentage || 0, }; }, -}); +} ); /** * List user deployments with pagination and filtering */ -export const listUserDeployments = query({ +export const listUserDeployments = query( { args: { - limit: v.optional(v.number()), - status: v.optional(v.string()), - agentId: v.optional(v.id("agents")), + limit: v.optional( v.number() ), + status: v.optional( v.string() ), + agentId: v.optional( v.id( "agents" ) ), }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return []; } const baseQuery = ctx.db - .query("deployments") - .withIndex("by_user", (q) => q.eq("userId", userId)) - .order("desc"); + .query( "deployments" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userId ) ) + .order( "desc" ); const deployments = args.limit - ? await baseQuery.take(args.limit) + ? await baseQuery.take( args.limit ) : await baseQuery.collect(); // Filter by status if specified let filteredDeployments = deployments; - if (args.status) { - filteredDeployments = deployments.filter(d => d.status === args.status); + if ( args.status ) { + filteredDeployments = deployments.filter( d => d.status === args.status ); } // Filter by agent if specified - if (args.agentId) { - filteredDeployments = filteredDeployments.filter(d => d.agentId === args.agentId); + if ( args.agentId ) { + filteredDeployments = filteredDeployments.filter( d => d.agentId === args.agentId ); } // Add computed fields - return filteredDeployments.map(deployment => { + return filteredDeployments.map( deployment => { const elapsed = deployment.createdAt ? Date.now() - deployment.createdAt : 0; - const isActive = !["COMPLETED", "FAILED", "CANCELLED"].includes(deployment.status); + const isActive = !["COMPLETED", "FAILED", "CANCELLED"].includes( deployment.status ); return { ...deployment, elapsed, isActive, - formattedDuration: formatDuration(elapsed), + formattedDuration: formatDuration( elapsed ), progressPercentage: deployment.progress?.percentage || 0, }; - }); + } ); }, -}); +} ); /** * Cancel active deployment */ -export const cancelDeployment = mutation({ - args: { deploymentId: v.id("deployments") }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { - throw new Error("Not authenticated"); +export const cancelDeployment = mutation( { + args: { deploymentId: v.id( "deployments" ) }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Not authenticated" ); } - const deployment = await ctx.db.get(args.deploymentId); - if (!deployment || deployment.userId !== userId) { - throw new Error("Deployment not found or not authorized"); + const deployment = await ctx.db.get( args.deploymentId ); + if ( !deployment || deployment.userId !== userId ) { + throw new Error( "Deployment not found or not authorized" ); } - if (["COMPLETED", "FAILED", "CANCELLED"].includes(deployment.status)) { - throw new Error("Cannot cancel completed deployment"); + if ( ["COMPLETED", "FAILED", "CANCELLED"].includes( deployment.status ) ) { + throw new Error( "Cannot cancel completed deployment" ); } - const existingLogs = Array.isArray(deployment.logs) ? deployment.logs : []; - await ctx.db.patch(args.deploymentId, { + const existingLogs = Array.isArray( deployment.logs ) ? deployment.logs : []; + await ctx.db.patch( args.deploymentId, { status: "CANCELLED", completedAt: Date.now(), updatedAt: Date.now(), @@ -433,21 +451,21 @@ export const cancelDeployment = mutation({ message: "Deployment cancelled by user", source: "user", }], - }); + } ); return { success: true }; }, -}); +} ); // Helper function to format duration -function formatDuration(milliseconds: number): string { - const seconds = Math.floor(milliseconds / 1000); - const minutes = Math.floor(seconds / 60); - const hours = Math.floor(minutes / 60); +function formatDuration( milliseconds: number ): string { + const seconds = Math.floor( milliseconds / 1000 ); + const minutes = Math.floor( seconds / 60 ); + const hours = Math.floor( minutes / 60 ); - if (hours > 0) { + if ( hours > 0 ) { return `${hours}h ${minutes % 60}m ${seconds % 60}s`; - } else if (minutes > 0) { + } else if ( minutes > 0 ) { return `${minutes}m ${seconds % 60}s`; } else { return `${seconds}s`; @@ -457,58 +475,58 @@ function formatDuration(milliseconds: number): string { /** * Get deployment status */ -export const getDeployment = query({ - args: { deploymentId: v.id("deployments") }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { +export const getDeployment = query( { + args: { deploymentId: v.id( "deployments" ) }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return null; } - const deployment = await ctx.db.get(args.deploymentId); - if (!deployment || deployment.userId !== userId) { + const deployment = await ctx.db.get( args.deploymentId ); + if ( !deployment || deployment.userId !== userId ) { return null; } return deployment; }, -}); +} ); /** * Get user's deployments */ -export const getUserDeployments = query({ - args: { limit: v.optional(v.number()) }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { +export const getUserDeployments = query( { + args: { limit: v.optional( v.number() ) }, + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return []; } - const limit = Math.min(args.limit || 20, 100); + const limit = Math.min( args.limit || 20, 100 ); return await ctx.db - .query("deployments") - .withIndex("by_user", (q) => q.eq("userId", userId)) - .order("desc") - .take(limit); + .query( "deployments" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userId ) ) + .order( "desc" ) + .take( limit ); }, -}); +} ); // Helper Functions -async function generateDeploymentArtifacts(agent: any, config: any) { +async function generateDeploymentArtifacts( agent: any, config: any ) { // Generate AgentCore-compatible agent code - const agentCode = generateAgentCoreCode(agent); + const agentCode = generateAgentCoreCode( agent ); // Generate requirements.txt - const requirements = generateAgentCoreRequirements(agent.tools); + const requirements = generateAgentCoreRequirements( agent.tools ); // Generate Dockerfile - const dockerfile = generateAgentCoreDockerfile(); + const dockerfile = generateAgentCoreDockerfile( agent ); // Generate AgentCore configuration - const agentCoreConfig = generateAgentCoreConfig(agent, config); + const agentCoreConfig = generateAgentCoreConfig( agent, config ); return { agentCode, @@ -520,14 +538,14 @@ async function generateDeploymentArtifacts(agent: any, config: any) { }; } -function generateAgentCoreCode(agent: any): string { +function generateAgentCoreCode( agent: any ): string { // Generate tool imports based on agent tools const toolImports = agent.tools && agent.tools.length > 0 - ? agent.tools.map((tool: any) => `from strands_tools import ${tool.name}`).join('\n') + ? agent.tools.map( ( tool: any ) => `from strands_tools import ${tool.name}` ).join( '\n' ) : '# No tools configured'; - + const toolsList = agent.tools && agent.tools.length > 0 - ? agent.tools.map((tool: any) => tool.name).join(', ') + ? agent.tools.map( ( tool: any ) => tool.name ).join( ', ' ) : ''; return `""" @@ -556,22 +574,22 @@ app = BedrockAgentCoreApp() async def agent_invocation(payload, context): """ Handler for agent invocation with streaming support - + Args: payload: Input payload with 'prompt' key context: AgentCore runtime context - + Yields: Streaming events from agent execution """ user_message = payload.get("prompt", "No prompt provided") - + print(f"[${agent.name}] Processing: {user_message}") print(f"Context: {context}") - + # Stream agent responses agent_stream = agent.stream_async(user_message) - + async for event in agent_stream: yield event @@ -580,58 +598,65 @@ if __name__ == "__main__": `; } -function generateAgentCoreRequirements(tools: any[]): string { - const packages = new Set([ +function generateAgentCoreRequirements( tools: any[] ): string { + const packages = new Set( [ "strands-agents>=1.0.0", "bedrock-agentcore>=0.1.6", "bedrock-agentcore-starter-toolkit>=0.1.25", "boto3>=1.28.0", "pyjwt>=2.8.0", - ]); + ] ); // Add tool-specific packages - tools.forEach(tool => { - if (tool.requiresPip && tool.pipPackages) { - tool.pipPackages.forEach((pkg: string) => packages.add(pkg)); + tools.forEach( tool => { + if ( tool.requiresPip && tool.pipPackages ) { + tool.pipPackages.forEach( ( pkg: string ) => packages.add( pkg ) ); } - }); + } ); - return Array.from(packages).join("\\n"); + return Array.from( packages ).join( String.raw`\n` ); } -function generateAgentCoreDockerfile(): string { - return `FROM python:3.11-slim +function generateAgentCoreDockerfile( agent: any ): string { + const isOllamaModel = typeof agent.model === "string" && !agent.model.includes( "." ); + + if ( isOllamaModel ) { + // Validate model name to prevent shell injection in entrypoint.sh + const safeModelPattern = /^[A-Za-z0-9._:/-]+$/; + const modelName = safeModelPattern.test( agent.model ) ? agent.model : "llama3:latest"; + + return `FROM ollama/ollama:latest -# System dependencies -RUN apt-get update && apt-get install -y \\ - gcc \\ - g++ \\ - curl \\ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y python3.11 python3-pip curl && rm -rf /var/lib/apt/lists/* -# Create app directory WORKDIR /app +COPY requirements.txt agent.py ./ +RUN pip3 install --no-cache-dir -r requirements.txt -# Copy and install requirements -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +RUN echo '#!/bin/bash\nollama serve &\nsleep 5\nollama pull ${modelName}\npython3 agent.py' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh -# Copy agent code -COPY agent.py . +EXPOSE 8080 11434 +ENTRYPOINT ["/app/entrypoint.sh"] +`; + } + + return `FROM python:3.11-slim + +RUN apt-get update && apt-get install -y gcc g++ curl && rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY requirements.txt agent.py ./ +RUN pip install --no-cache-dir -r requirements.txt -# Create non-root user RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app USER appuser -# Expose port for AgentCore Runtime EXPOSE 8080 - -# Run AgentCore agent CMD ["python", "agent.py"] `; } -function generateAgentCoreConfig(agent: any, config: any) { +function generateAgentCoreConfig( agent: any, config: any ) { return { name: config.agentName, description: config.description || agent.description, @@ -656,7 +681,7 @@ function generateAgentCoreConfig(agent: any, config: any) { }; } -async function deployToAgentCore(artifacts: any, config: any) { +async function deployToAgentCore( artifacts: any, config: any ) { // This would use the AgentCore CLI or SDK to deploy // For now, return a mock successful deployment return { @@ -675,26 +700,26 @@ async function deployToAgentCore(artifacts: any, config: any) { /** * Create deployment record (internal) */ -export const createDeploymentInternal = internalMutation({ +export const createDeploymentInternal = internalMutation( { args: { - agentId: v.id("agents"), - userId: v.union(v.id("users"), v.string()), - tier: v.optional(v.string()), - deploymentConfig: v.object({ + agentId: v.id( "agents" ), + userId: v.union( v.id( "users" ), v.string() ), + tier: v.optional( v.string() ), + deploymentConfig: v.object( { region: v.string(), agentName: v.string(), - description: v.optional(v.string()), - enableMonitoring: v.optional(v.boolean()), - enableAutoScaling: v.optional(v.boolean()), - }), + description: v.optional( v.string() ), + enableMonitoring: v.optional( v.boolean() ), + enableAutoScaling: v.optional( v.boolean() ), + } ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { // Ensure userId is a proper Id<"users"> - const userId = typeof args.userId === 'string' && args.userId.startsWith('j') + const userId = typeof args.userId === 'string' && args.userId.startsWith( 'j' ) ? args.userId as any : args.userId; - return await ctx.db.insert("deployments", { + return await ctx.db.insert( "deployments", { agentId: args.agentId, userId: userId, tier: args.tier || "freemium", @@ -715,148 +740,148 @@ export const createDeploymentInternal = internalMutation({ updatedAt: Date.now(), startedAt: Date.now(), isActive: true, - }); + } ); }, -}); +} ); /** * Update deployment status (internal) */ -export const updateDeploymentStatusInternal = internalMutation({ +export const updateDeploymentStatusInternal = internalMutation( { args: { - deploymentId: v.id("deployments"), + deploymentId: v.id( "deployments" ), status: v.string(), - progress: v.optional(v.object({ + progress: v.optional( v.object( { stage: v.string(), percentage: v.number(), message: v.string(), - currentStep: v.optional(v.string()), - totalSteps: v.optional(v.number()), - })), - agentCoreRuntimeId: v.optional(v.string()), - agentCoreEndpoint: v.optional(v.string()), - cloudFormationStackId: v.optional(v.string()), - ecrRepositoryUri: v.optional(v.string()), - s3BucketName: v.optional(v.string()), - deploymentPackageKey: v.optional(v.string()), - awsAccountId: v.optional(v.string()), - awsCallerArn: v.optional(v.string()), - logs: v.optional(v.union( + currentStep: v.optional( v.string() ), + totalSteps: v.optional( v.number() ), + } ) ), + agentCoreRuntimeId: v.optional( v.string() ), + agentCoreEndpoint: v.optional( v.string() ), + cloudFormationStackId: v.optional( v.string() ), + ecrRepositoryUri: v.optional( v.string() ), + s3BucketName: v.optional( v.string() ), + deploymentPackageKey: v.optional( v.string() ), + awsAccountId: v.optional( v.string() ), + awsCallerArn: v.optional( v.string() ), + logs: v.optional( v.union( v.string(), - v.array(v.object({ + v.array( v.object( { timestamp: v.number(), level: v.string(), message: v.string(), - source: v.optional(v.string()), - })) - )), - error: v.optional(v.string()), + source: v.optional( v.string() ), + } ) ) + ) ), + error: v.optional( v.string() ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { const updates: any = { status: args.status, updatedAt: Date.now(), }; - if (args.progress) { + if ( args.progress ) { updates.progress = args.progress; } - if (args.agentCoreRuntimeId) { + if ( args.agentCoreRuntimeId ) { updates.agentCoreRuntimeId = args.agentCoreRuntimeId; } - if (args.agentCoreEndpoint) { + if ( args.agentCoreEndpoint ) { updates.agentCoreEndpoint = args.agentCoreEndpoint; } - if (args.cloudFormationStackId) { + if ( args.cloudFormationStackId ) { updates.cloudFormationStackId = args.cloudFormationStackId; } - if (args.ecrRepositoryUri) { + if ( args.ecrRepositoryUri ) { updates.ecrRepositoryUri = args.ecrRepositoryUri; } - if (args.s3BucketName) { + if ( args.s3BucketName ) { updates.s3BucketName = args.s3BucketName; } - if (args.deploymentPackageKey) { + if ( args.deploymentPackageKey ) { updates.deploymentPackageKey = args.deploymentPackageKey; } - if (args.awsAccountId) { + if ( args.awsAccountId ) { updates.awsAccountId = args.awsAccountId; } - if (args.awsCallerArn) { + if ( args.awsCallerArn ) { updates.awsCallerArn = args.awsCallerArn; } - if (args.status === "ACTIVE") { + if ( args.status === "ACTIVE" ) { updates.deployedAt = Date.now(); updates.isActive = true; } - if (args.status === "FAILED" || args.status === "DELETED") { + if ( args.status === "FAILED" || args.status === "DELETED" ) { updates.isActive = false; } - if (args.status === "DELETED") { + if ( args.status === "DELETED" ) { updates.deletedAt = Date.now(); } - if (args.logs || args.progress?.message) { - const deployment = await ctx.db.get(args.deploymentId); - const existingLogs = Array.isArray(deployment?.logs) ? deployment.logs : []; + if ( args.logs || args.progress?.message ) { + const deployment = await ctx.db.get( args.deploymentId ); + const existingLogs = Array.isArray( deployment?.logs ) ? deployment.logs : []; const combinedLogs = [...existingLogs]; - if (args.logs) { - if (Array.isArray(args.logs)) { - combinedLogs.push(...args.logs); + if ( args.logs ) { + if ( Array.isArray( args.logs ) ) { + combinedLogs.push( ...args.logs ); } } - if (args.progress?.message) { - combinedLogs.push({ + if ( args.progress?.message ) { + combinedLogs.push( { timestamp: Date.now(), level: args.status === "FAILED" ? "error" : "info", message: args.progress.message, source: "deployment", - }); + } ); } updates.logs = combinedLogs; } - await ctx.db.patch(args.deploymentId, updates); + await ctx.db.patch( args.deploymentId, updates ); }, -}); +} ); /** * Get deployment (internal) */ -export const getDeploymentInternal = internalQuery({ - args: { deploymentId: v.id("deployments") }, - handler: async (ctx, args) => { - return await ctx.db.get(args.deploymentId); +export const getDeploymentInternal = internalQuery( { + args: { deploymentId: v.id( "deployments" ) }, + handler: async ( ctx, args ) => { + return await ctx.db.get( args.deploymentId ); }, -}); +} ); /** * Execute deployment (internal action) */ -export const executeDeploymentInternal = internalAction({ +export const executeDeploymentInternal = internalAction( { args: { - deploymentId: v.id("deployments"), - agentId: v.id("agents"), - userId: v.id("users"), + deploymentId: v.id( "deployments" ), + agentId: v.id( "agents" ), + userId: v.id( "users" ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { try { // Update status to building with progress tracking - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -866,13 +891,13 @@ export const executeDeploymentInternal = internalAction({ currentStep: "docker-build", totalSteps: 5, }, - }); + } ); // Simulate building process - await new Promise(resolve => setTimeout(resolve, 2000)); + await new Promise( resolve => setTimeout( resolve, 2000 ) ); // Update status to deploying with progress - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "DEPLOYING", progress: { @@ -884,13 +909,13 @@ export const executeDeploymentInternal = internalAction({ }, ecrRepositoryUri: "123456789012.dkr.ecr.us-east-1.amazonaws.com/agent-repo", cloudFormationStackId: "arn:aws:cloudformation:us-east-1:123456789012:stack/agent-stack/12345", - }); + } ); // Simulate deployment process - await new Promise(resolve => setTimeout(resolve, 3000)); + await new Promise( resolve => setTimeout( resolve, 3000 ) ); // Update status to completed with final progress - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "ACTIVE", progress: { @@ -903,11 +928,11 @@ export const executeDeploymentInternal = internalAction({ agentCoreRuntimeId: "agent-runtime-12345", agentCoreEndpoint: "https://agent-12345.execute-api.us-east-1.amazonaws.com/prod", s3BucketName: "agent-deployments-12345", - }); + } ); - } catch (error: any) { + } catch ( error: any ) { // Update status to failed with error details - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "FAILED", progress: { @@ -916,10 +941,10 @@ export const executeDeploymentInternal = internalAction({ message: `Deployment failed: ${error.message}`, }, error: error.message, - }); + } ); } }, -}); +} ); // ============================================================================ // TIER DEPLOYMENT FUNCTIONS @@ -928,24 +953,24 @@ export const executeDeploymentInternal = internalAction({ /** * Tier 1: Deploy to YOUR Fargate (Freemium) */ -async function deployTier1(ctx: any, args: any, userId: string): Promise { +async function deployTier1( ctx: any, args: any, userId: string ): Promise { // Create deployment record - const deploymentId: any = await ctx.runMutation(internal.awsDeployment.createDeploymentInternal, { + const deploymentId: any = await ctx.runMutation( internal.awsDeployment.createDeploymentInternal, { agentId: args.agentId, userId, tier: "freemium", deploymentConfig: args.deploymentConfig, - }); + } ); - // Increment usage counter - await ctx.runMutation(internal.awsDeployment.incrementUsageInternal, { userId }); + // Increment usage counter (centralized in stripeMutations.ts) + await ctx.runMutation( internalStripeMutations.incrementUsageAndReportOverage, { userId } ); // Start deployment - await ctx.scheduler.runAfter(0, internal.awsDeployment.executeDeploymentInternal, { + await ctx.scheduler.runAfter( 0, internal.awsDeployment.executeDeploymentInternal, { deploymentId, agentId: args.agentId, userId, - }); + } ); return { deploymentId, @@ -958,30 +983,30 @@ async function deployTier1(ctx: any, args: any, userId: string): Promise { /** * Tier 2: Deploy to USER's Fargate (Personal AWS Account) using Web Identity Federation */ -async function deployTier2(ctx: any, args: any, userId: string): Promise { +async function deployTier2( ctx: any, args: any, userId: string ): Promise { // Get user's stored Role ARN - const user = await ctx.runQuery(internal.awsDeployment.getUserTierInternal, { userId }); + const user = await ctx.runQuery( internal.awsDeployment.getUserTierInternal, { userId } ); - if (!user || !user.awsRoleArn) { - throw new Error("No AWS Role ARN configured. Please configure your IAM role in settings."); + if ( !user || !user.awsRoleArn ) { + throw new Error( "No AWS Role ARN configured. Please configure your IAM role in settings." ); } // Create deployment record - const deploymentId: any = await ctx.runMutation(internal.awsDeployment.createDeploymentInternal, { + const deploymentId: any = await ctx.runMutation( internal.awsDeployment.createDeploymentInternal, { agentId: args.agentId, userId, tier: "personal", deploymentConfig: args.deploymentConfig, - }); + } ); // Start deployment with web identity federation - await ctx.scheduler.runAfter(0, internal.awsDeployment.executeWebIdentityDeploymentInternal, { + await ctx.scheduler.runAfter( 0, internal.awsDeployment.executeWebIdentityDeploymentInternal, { deploymentId, agentId: args.agentId, userId, roleArn: user.awsRoleArn, region: args.deploymentConfig.region, - }); + } ); return { deploymentId, @@ -994,58 +1019,53 @@ async function deployTier2(ctx: any, args: any, userId: string): Promise { /** * Get user tier (internal) */ -export const getUserTierInternal = internalQuery({ - args: { userId: v.id("users") }, - handler: async (ctx, args) => { - return await ctx.db.get(args.userId); +export const getUserTierInternal = internalQuery( { + args: { userId: v.id( "users" ) }, + handler: async ( ctx, args ) => { + return await ctx.db.get( args.userId ); }, -}); +} ); /** * Get user AWS account (internal) */ -export const getUserAWSAccountInternal = internalQuery({ - args: { userId: v.id("users") }, - handler: async (ctx, args) => { +export const getUserAWSAccountInternal = internalQuery( { + args: { userId: v.id( "users" ) }, + handler: async ( ctx, args ) => { return await ctx.db - .query("userAWSAccounts") - .withIndex("by_user_id", (q) => q.eq("userId", args.userId)) + .query( "userAWSAccounts" ) + .withIndex( "by_user_id", ( q ) => q.eq( "userId", args.userId ) ) .first(); }, -}); +} ); /** - * Increment usage counter (internal) + * Increment usage counter — delegates to shared helper in stripeMutations.ts + * (single source of truth for usage + overage logic). */ -export const incrementUsageInternal = internalMutation({ - args: { userId: v.id("users") }, - handler: async (ctx, args) => { - const user = await ctx.db.get(args.userId); - - if (!user) return; - - await ctx.db.patch(args.userId, { - testsThisMonth: (user.testsThisMonth || 0) + 1, - }); +export const incrementUsageInternal = internalMutation( { + args: { userId: v.id( "users" ) }, + handler: async ( ctx, args ) => { + await incrementUsageAndReportOverageImpl( ctx, args.userId ); }, -}); +} ); /** * Execute cross-account deployment (internal) */ -export const executeCrossAccountDeploymentInternal = internalAction({ +export const executeCrossAccountDeploymentInternal = internalAction( { args: { - deploymentId: v.id("deployments"), - agentId: v.id("agents"), + deploymentId: v.id( "deployments" ), + agentId: v.id( "agents" ), userId: v.string(), roleArn: v.string(), externalId: v.string(), region: v.string(), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { try { // Update status - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -1055,13 +1075,13 @@ export const executeCrossAccountDeploymentInternal = internalAction({ currentStep: "assume-role", totalSteps: 5, }, - }); + } ); // Assume role in user's account - await assumeUserRole(args.roleArn, args.externalId); + await assumeUserRole( args.roleArn, args.externalId ); // Deploy to their Fargate - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "DEPLOYING", progress: { @@ -1071,13 +1091,13 @@ export const executeCrossAccountDeploymentInternal = internalAction({ currentStep: "deploy-fargate", totalSteps: 5, }, - }); + } ); // Simulate deployment - await new Promise(resolve => setTimeout(resolve, 3000)); + await new Promise( resolve => setTimeout( resolve, 3000 ) ); // Complete - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "ACTIVE", progress: { @@ -1087,10 +1107,10 @@ export const executeCrossAccountDeploymentInternal = internalAction({ currentStep: "completed", totalSteps: 5, }, - }); + } ); - } catch (error: any) { - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + } catch ( error: any ) { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "FAILED", progress: { @@ -1099,26 +1119,26 @@ export const executeCrossAccountDeploymentInternal = internalAction({ message: `Deployment failed: ${error.message}`, }, error: error.message, - }); + } ); } }, -}); +} ); /** * Execute deployment using Web Identity Federation * Gets temporary credentials via AssumeRoleWithWebIdentity and deploys to user's AWS */ -export const executeWebIdentityDeploymentInternal = internalAction({ +export const executeWebIdentityDeploymentInternal = internalAction( { args: { - deploymentId: v.id("deployments"), - agentId: v.id("agents"), + deploymentId: v.id( "deployments" ), + agentId: v.id( "agents" ), userId: v.string(), roleArn: v.string(), region: v.string(), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { try { - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -1128,14 +1148,14 @@ export const executeWebIdentityDeploymentInternal = internalAction({ currentStep: "authenticate", totalSteps: 5, }, - }); + } ); - const assumeRoleResult = await ctx.runAction(api.awsAuth.assumeRoleWithWebIdentity, { + const assumeRoleResult = await ctx.runAction( api.awsAuth.assumeRoleWithWebIdentity, { roleArn: args.roleArn, - }); + } ); - if (!assumeRoleResult.success || !assumeRoleResult.credentials) { - throw new Error(assumeRoleResult.error || "Failed to assume role with web identity"); + if ( !assumeRoleResult.success || !assumeRoleResult.credentials ) { + throw new Error( assumeRoleResult.error || "Failed to assume role with web identity" ); } const region = args.region; @@ -1146,13 +1166,13 @@ export const executeWebIdentityDeploymentInternal = internalAction({ sessionToken: tempCredentials.sessionToken, }; - const { STSClient, GetCallerIdentityCommand } = await import("@aws-sdk/client-sts"); - const stsClient = new STSClient({ region, credentials: awsCredentials }); - const identity = await stsClient.send(new GetCallerIdentityCommand({})); + const { STSClient, GetCallerIdentityCommand } = await import( "@aws-sdk/client-sts" ); + const stsClient = new STSClient( { region, credentials: awsCredentials } ); + const identity = await stsClient.send( new GetCallerIdentityCommand( {} ) ); const awsAccountId = identity.Account || "unknown"; const callerArn = identity.Arn || ""; - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -1162,28 +1182,28 @@ export const executeWebIdentityDeploymentInternal = internalAction({ currentStep: "package-artifacts", totalSteps: 5, }, - }); + } ); - const agent = await ctx.runQuery(internal.agents.getInternal, { id: args.agentId }); - if (!agent) { - throw new Error("Agent not found"); + const agent = await ctx.runQuery( internal.agents.getInternal, { id: args.agentId } ); + if ( !agent ) { + throw new Error( "Agent not found" ); } - const { files } = assembleDeploymentPackageFiles(agent, { + const { files } = assembleDeploymentPackageFiles( agent, { deploymentTarget: agent.deploymentType === "aws" ? "agentcore" : agent.deploymentType, includeCloudFormation: true, includeCLIScript: true, includeLambdaConfig: agent.deploymentType === "lambda", - }); + } ); - const JSZipModule = await import("jszip"); + const JSZipModule = await import( "jszip" ); const zip = new JSZipModule.default(); - for (const [filename, content] of Object.entries(files)) { - zip.file(filename, content); + for ( const [filename, content] of Object.entries( files ) ) { + zip.file( filename, content ); } - const zipBuffer: Buffer = await zip.generateAsync({ type: "nodebuffer" }); + const zipBuffer: Buffer = await zip.generateAsync( { type: "nodebuffer" } ); - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -1193,50 +1213,50 @@ export const executeWebIdentityDeploymentInternal = internalAction({ currentStep: "package-artifacts", totalSteps: 5, }, - }); + } ); - const sanitizedName = sanitizeAgentName(agent.name || `agent-${args.agentId}`); + const sanitizedName = sanitizeAgentName( agent.name || `agent-${args.agentId}` ); const packageKey = `agentcore/${sanitizedName}/${args.deploymentId}-${Date.now()}.zip`; const baseBucketName = `agent-builder-${awsAccountId}-deployments`; let bucketName = baseBucketName; - const { S3Client, CreateBucketCommand, HeadBucketCommand, PutObjectCommand, GetObjectCommand } = await import("@aws-sdk/client-s3"); - const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner"); - const s3Client = new S3Client({ region, credentials: awsCredentials }); + const { S3Client, CreateBucketCommand, HeadBucketCommand, PutObjectCommand, GetObjectCommand } = await import( "@aws-sdk/client-s3" ); + const { getSignedUrl } = await import( "@aws-sdk/s3-request-presigner" ); + const s3Client = new S3Client( { region, credentials: awsCredentials } ); try { - await s3Client.send(new HeadBucketCommand({ Bucket: bucketName })); - } catch (headError: any) { + await s3Client.send( new HeadBucketCommand( { Bucket: bucketName } ) ); + } catch ( headError: any ) { try { const createParams: any = { Bucket: bucketName }; - if (region !== "us-east-1") { + if ( region !== "us-east-1" ) { createParams.CreateBucketConfiguration = { LocationConstraint: region }; } - await s3Client.send(new CreateBucketCommand(createParams)); - } catch (createError: any) { - if (createError.name === "BucketAlreadyOwnedByYou") { + await s3Client.send( new CreateBucketCommand( createParams ) ); + } catch ( createError: any ) { + if ( createError.name === "BucketAlreadyOwnedByYou" ) { // bucket already accessible - } else if (createError.name === "BucketAlreadyExists") { + } else if ( createError.name === "BucketAlreadyExists" ) { bucketName = `${baseBucketName}-${Date.now()}`; const createParams: any = { Bucket: bucketName }; - if (region !== "us-east-1") { + if ( region !== "us-east-1" ) { createParams.CreateBucketConfiguration = { LocationConstraint: region }; } - await s3Client.send(new CreateBucketCommand(createParams)); + await s3Client.send( new CreateBucketCommand( createParams ) ); } else { throw createError; } } } - await s3Client.send(new PutObjectCommand({ + await s3Client.send( new PutObjectCommand( { Bucket: bucketName, Key: packageKey, Body: zipBuffer, ContentType: "application/zip", - })); + } ) ); - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "BUILDING", progress: { @@ -1250,26 +1270,26 @@ export const executeWebIdentityDeploymentInternal = internalAction({ deploymentPackageKey: packageKey, awsAccountId, awsCallerArn: callerArn, - }); + } ); - const { ECRClient, DescribeRepositoriesCommand, CreateRepositoryCommand } = await import("@aws-sdk/client-ecr"); - const ecrClient = new ECRClient({ region, credentials: awsCredentials }); + const { ECRClient, DescribeRepositoriesCommand, CreateRepositoryCommand } = await import( "@aws-sdk/client-ecr" ); + const ecrClient = new ECRClient( { region, credentials: awsCredentials } ); const repositoryName = `agent-builder/${sanitizedName}`; let repositoryUri: string | undefined; try { - const describe = await ecrClient.send(new DescribeRepositoriesCommand({ repositoryNames: [repositoryName] })); + const describe = await ecrClient.send( new DescribeRepositoriesCommand( { repositoryNames: [repositoryName] } ) ); repositoryUri = describe.repositories?.[0]?.repositoryUri; - } catch (repoError: any) { - if (repoError.name === "RepositoryNotFoundException") { - const created = await ecrClient.send(new CreateRepositoryCommand({ repositoryName })); + } catch ( repoError: any ) { + if ( repoError.name === "RepositoryNotFoundException" ) { + const created = await ecrClient.send( new CreateRepositoryCommand( { repositoryName } ) ); repositoryUri = created.repository?.repositoryUri; } else { throw repoError; } } - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "DEPLOYING", progress: { @@ -1282,17 +1302,17 @@ export const executeWebIdentityDeploymentInternal = internalAction({ totalSteps: 5, }, ecrRepositoryUri: repositoryUri, - }); + } ); let downloadUrl: string | null = null; try { downloadUrl = await getSignedUrl( s3Client, - new GetObjectCommand({ Bucket: bucketName, Key: packageKey }), + new GetObjectCommand( { Bucket: bucketName, Key: packageKey } ), { expiresIn: 3600 } ); - } catch (presignError) { - console.warn("Unable to create presigned URL for deployment package", presignError); + } catch ( presignError ) { + console.warn( "Unable to create presigned URL for deployment package", presignError ); } const instructionsLines = [ @@ -1304,13 +1324,13 @@ export const executeWebIdentityDeploymentInternal = internalAction({ "3. Deploy the AgentCore stack using the CloudFormation template inside agent_package.zip or run deploy_agentcore.sh.", ]; - if (downloadUrl) { - instructionsLines.push(`Temporary download URL (valid 1 hour): ${downloadUrl}`); + if ( downloadUrl ) { + instructionsLines.push( `Temporary download URL (valid 1 hour): ${downloadUrl}` ); } - const instructions = instructionsLines.join("\n\n"); + const instructions = instructionsLines.join( "\n\n" ); - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "ACTIVE", progress: { @@ -1333,11 +1353,11 @@ export const executeWebIdentityDeploymentInternal = internalAction({ source: "deployment", }, ], - }); + } ); - } catch (error: any) { - console.error("Web identity deployment error:", error); - await ctx.runMutation(internal.awsDeployment.updateDeploymentStatusInternal, { + } catch ( error: any ) { + console.error( "Web identity deployment error:", error ); + await ctx.runMutation( internal.awsDeployment.updateDeploymentStatusInternal, { deploymentId: args.deploymentId, status: "FAILED", progress: { @@ -1346,15 +1366,15 @@ export const executeWebIdentityDeploymentInternal = internalAction({ message: `Deployment failed: ${error.message}`, }, error: error.message, - }); + } ); } }, -}); +} ); /** * Assume role in user's AWS account (DEPRECATED - use web identity instead) */ -async function assumeUserRole(roleArn: string, externalId: string) { +async function assumeUserRole( roleArn: string, externalId: string ) { const response = await fetch( `${process.env.CONVEX_SITE_URL}/aws/assumeRole`, { @@ -1363,17 +1383,17 @@ async function assumeUserRole(roleArn: string, externalId: string) { "Content-Type": "application/json", Authorization: `Bearer ${process.env.AWS_API_SECRET}`, }, - body: JSON.stringify({ + body: JSON.stringify( { roleArn, externalId, sessionName: `agent-deployment-${Date.now()}`, durationSeconds: 3600, - }), + } ), } ); - if (!response.ok) { - throw new Error(`Failed to assume role: ${response.statusText}`); + if ( !response.ok ) { + throw new Error( `Failed to assume role: ${response.statusText}` ); } return await response.json(); @@ -1394,20 +1414,20 @@ async function deployToUserAWS( ECRClient, CreateRepositoryCommand, GetAuthorizationTokenCommand - } = await import("@aws-sdk/client-ecr"); + } = await import( "@aws-sdk/client-ecr" ); const { ECSClient, CreateClusterCommand, RegisterTaskDefinitionCommand, CreateServiceCommand - } = await import("@aws-sdk/client-ecs"); + } = await import( "@aws-sdk/client-ecs" ); const { S3Client, CreateBucketCommand, PutObjectCommand - } = await import("@aws-sdk/client-s3"); + } = await import( "@aws-sdk/client-s3" ); const { EC2Client, @@ -1416,7 +1436,7 @@ async function deployToUserAWS( CreateSecurityGroupCommand, AuthorizeSecurityGroupIngressCommand, DescribeSecurityGroupsCommand - } = await import("@aws-sdk/client-ec2"); + } = await import( "@aws-sdk/client-ec2" ); // Configure AWS clients with temporary credentials const credentials = { @@ -1425,88 +1445,88 @@ async function deployToUserAWS( sessionToken }; - const ecrClient = new ECRClient({ region, credentials }); - const ecsClient = new ECSClient({ region, credentials }); - const s3Client = new S3Client({ region, credentials }); - const ec2Client = new EC2Client({ region, credentials }); + const ecrClient = new ECRClient( { region, credentials } ); + const ecsClient = new ECSClient( { region, credentials } ); + const s3Client = new S3Client( { region, credentials } ); + const ec2Client = new EC2Client( { region, credentials } ); // 1. Create ECR repository for agent image const repoName = `agent-${agent._id.toLowerCase()}`; try { - await ecrClient.send(new CreateRepositoryCommand({ + await ecrClient.send( new CreateRepositoryCommand( { repositoryName: repoName, imageScanningConfiguration: { scanOnPush: true } - })); - console.log(`Created ECR repository: ${repoName}`); - } catch (error: any) { - if (error.name !== "RepositoryAlreadyExistsException") { + } ) ); + console.log( `Created ECR repository: ${repoName}` ); + } catch ( error: any ) { + if ( error.name !== "RepositoryAlreadyExistsException" ) { throw error; } - console.log(`ECR repository already exists: ${repoName}`); + console.log( `ECR repository already exists: ${repoName}` ); } // 2. Get ECR auth token for Docker push - const authResponse = await ecrClient.send(new GetAuthorizationTokenCommand({})); + const authResponse = await ecrClient.send( new GetAuthorizationTokenCommand( {} ) ); const authToken = authResponse.authorizationData?.[0]; - if (!authToken) { - throw new Error("Failed to get ECR authorization token"); + if ( !authToken ) { + throw new Error( "Failed to get ECR authorization token" ); } // 3. Create S3 bucket for agent artifacts const bucketName = `agent-artifacts-${Date.now()}`; try { - await s3Client.send(new CreateBucketCommand({ + await s3Client.send( new CreateBucketCommand( { Bucket: bucketName, CreateBucketConfiguration: { - LocationConstraint: (region !== "us-east-1" ? region : undefined) as any + LocationConstraint: ( region !== "us-east-1" ? region : undefined ) as any } - })); - console.log(`Created S3 bucket: ${bucketName}`); - } catch (error: any) { - if (error.name !== "BucketAlreadyOwnedByYou") { + } ) ); + console.log( `Created S3 bucket: ${bucketName}` ); + } catch ( error: any ) { + if ( error.name !== "BucketAlreadyOwnedByYou" ) { throw error; } } // 4. Upload agent code to S3 - const agentCode = generateAgentCoreCode(agent); - await s3Client.send(new PutObjectCommand({ + const agentCode = generateAgentCoreCode( agent ); + await s3Client.send( new PutObjectCommand( { Bucket: bucketName, Key: "agent.py", Body: agentCode, ContentType: "text/x-python" - })); + } ) ); // 5. Create ECS cluster const clusterName = `agent-cluster-${agent._id}`; try { - await ecsClient.send(new CreateClusterCommand({ + await ecsClient.send( new CreateClusterCommand( { clusterName, capacityProviders: ["FARGATE"], defaultCapacityProviderStrategy: [{ capacityProvider: "FARGATE", weight: 1 }] - })); - console.log(`Created ECS cluster: ${clusterName}`); - } catch (error: any) { - if (error.name !== "ClusterAlreadyExistsException") { + } ) ); + console.log( `Created ECS cluster: ${clusterName}` ); + } catch ( error: any ) { + if ( error.name !== "ClusterAlreadyExistsException" ) { throw error; } } // 6. Register task definition const taskFamily = `agent-task-${agent._id}`; - const taskDefResponse = await ecsClient.send(new RegisterTaskDefinitionCommand({ + const taskDefResponse = await ecsClient.send( new RegisterTaskDefinitionCommand( { family: taskFamily, networkMode: "awsvpc", requiresCompatibilities: ["FARGATE"], cpu: "256", memory: "512", - executionRoleArn: `arn:aws:iam::${authToken.proxyEndpoint?.split('.')[0].split('//')[1]}:role/ecsTaskExecutionRole`, + executionRoleArn: `arn:aws:iam::${authToken.proxyEndpoint?.split( '.' )[0].split( '//' )[1]}:role/ecsTaskExecutionRole`, containerDefinitions: [{ name: "agent-container", image: `${authToken.proxyEndpoint}/${repoName}:latest`, @@ -1528,14 +1548,14 @@ async function deployToUserAWS( } } }] - })); + } ) ); - console.log(`Registered task definition: ${taskDefResponse.taskDefinition?.taskDefinitionArn}`); + console.log( `Registered task definition: ${taskDefResponse.taskDefinition?.taskDefinitionArn}` ); // 7. Create ECS service const serviceName = `agent-service-${agent._id}`; try { - await ecsClient.send(new CreateServiceCommand({ + await ecsClient.send( new CreateServiceCommand( { cluster: clusterName, serviceName, taskDefinition: taskDefResponse.taskDefinition?.taskDefinitionArn, @@ -1548,10 +1568,10 @@ async function deployToUserAWS( securityGroups: [] // TODO: Create security group } } - })); - console.log(`Created ECS service: ${serviceName}`); - } catch (error: any) { - if (error.name !== "ServiceAlreadyExistsException") { + } ) ); + console.log( `Created ECS service: ${serviceName}` ); + } catch ( error: any ) { + if ( error.name !== "ServiceAlreadyExistsException" ) { throw error; } } @@ -1564,4 +1584,3 @@ async function deployToUserAWS( taskDefinition: taskDefResponse.taskDefinition?.taskDefinitionArn }; } - diff --git a/convex/codeGenerator.ts b/convex/codeGenerator.ts index 724484d..f072e4b 100644 --- a/convex/codeGenerator.ts +++ b/convex/codeGenerator.ts @@ -127,7 +127,7 @@ function generateImports(tools: any[], deploymentType: string, modelId?: string) imports.push(""); imports.push("# Tool imports"); - imports.push("from strandsagents.tools import ("); + imports.push("from strands_tools import ("); // Add tool imports from strands-agents-tools const toolImports = new Set(); @@ -312,17 +312,34 @@ function generateImports(tools: any[], deploymentType: string, modelId?: string) return imports.join("\n"); } -function generateToolConfigs(tools: any[]): string { +function generateToolConfigs(tools: any[], linkedAgents?: Array<{agentId: string, agentName: string, description: string}>): string { + const toolCode: string[] = []; + + // Generate agent-as-tool wrappers for linked agents + if (linkedAgents && linkedAgents.length > 0) { + toolCode.push("# Agent-as-Tool Wrappers"); + toolCode.push("# These agents can be invoked as tools for hierarchical coordination\n"); + linkedAgents.forEach(agent => { + toolCode.push(generateAgentToolWrapper(agent.agentId, agent.agentName, agent.description)); + }); + toolCode.push(""); + } + // Generate custom tool functions with @tool decorator const customTools = tools .filter(tool => !isBuiltInTool(tool.type)) .map(tool => generateCustomToolFunction(tool)); - if (customTools.length === 0) { + if (customTools.length > 0) { + toolCode.push("# Custom Tools"); + toolCode.push(...customTools); + } + + if (toolCode.length === 0) { return "# All tools are built-in from strands-agents-tools"; } - return customTools.join("\n\n"); + return toolCode.join("\n\n"); } /** @@ -353,6 +370,44 @@ function isBuiltInTool(toolType: string): boolean { return builtInTools.includes(toolType); } +/** + * Generate agent-as-tool wrapper + */ +function generateAgentToolWrapper(agentId: string, agentName: string, description: string): string { + const toolName = agentName.replace(/[^a-zA-Z0-9_]/g, '_').toLowerCase(); + + return `@tool( + name="${toolName}", + description="${description || `Invoke ${agentName} agent`}", + parameters={ + "task": { + "type": "string", + "description": "Task or question for ${agentName}", + "required": True + } + } +) +async def ${toolName}(task: str) -> str: + """ + Invoke ${agentName} agent as a tool. + Enables hierarchical agent coordination. + """ + import os + import requests + + api_url = os.getenv("PLATFORM_API_URL", "https://api.mikepfunk.com") + + response = requests.post( + f"{api_url}/execute-agent", + json={"agentId": "${agentId}", "message": task}, + headers={"Authorization": f"Bearer {os.getenv('PLATFORM_API_KEY')}"}, + timeout=300 + ) + + result = response.json() + return result.get("content", "") if result.get("success") else f"Error: {result.get('error')}"`; +} + /** * Generate a custom tool function with @tool decorator */ diff --git a/convex/containerOrchestrator.ts b/convex/containerOrchestrator.ts index 21634ce..8154e6d 100644 --- a/convex/containerOrchestrator.ts +++ b/convex/containerOrchestrator.ts @@ -128,10 +128,10 @@ export const startTestContainer = internalAction({ console.log(`✅ ECS task started: ${taskArn}`); console.log(`📊 Log stream: ${logGroup}/${logStream}`); - // REMOVED: Log polling causes excessive operations - // Instead, logs should be fetched on-demand when user views them - // Or use CloudWatch Events to push log updates - + // NOTE: Usage is tracked on completion (not start) to avoid double-counting. + // submitTest already increments executionsThisMonth for cloud models. + // incrementUserUsage is called when "TEST COMPLETED SUCCESSFULLY" is detected in logs. + // Schedule timeout handler (single timeout, not recurring - OK) await ctx.scheduler.runAfter(args.timeout, internal.containerOrchestrator.handleTimeout, { testId: args.testId, @@ -207,6 +207,17 @@ export const fetchLogs = internalAction({ // Check for completion markers in logs const logsText = newLogs.join("\n"); if (logsText.includes("TEST COMPLETED SUCCESSFULLY")) { + // Get test details for usage tracking + const testDetails = await ctx.runQuery(internal.testExecution.getTestByIdInternal, { testId: args.testId }); + if (testDetails) { + // TRACK USAGE: On successful completion + await ctx.runMutation(internal.testExecution.incrementUserUsage, { + userId: testDetails.userId, + testId: args.testId, + executionMethod: "fargate", + }); + } + await ctx.runMutation(internal.testExecution.updateStatus, { testId: args.testId, status: "COMPLETED", diff --git a/convex/conversationAnalysis.ts b/convex/conversationAnalysis.ts new file mode 100644 index 0000000..ab3c997 --- /dev/null +++ b/convex/conversationAnalysis.ts @@ -0,0 +1,341 @@ +/** + * Conversation Analysis for Agent Improvement + * + * Analyzes chat conversations to identify: + * - Agent successes and failures + * - Missing capabilities + * - Performance bottlenecks + * - User frustrations and corrections + * - Suggested improvements + */ + +import { v } from "convex/values"; +import { mutation, query, action, internalMutation, internalQuery } from "./_generated/server"; +import { api, internal } from "./_generated/api"; +import { Id } from "./_generated/dataModel"; + +/** + * Analyze a conversation to identify improvement opportunities + */ +export const analyzeConversation = action({ + args: { + conversationId: v.id("conversations"), + }, + handler: async (ctx: any, args: { conversationId: Id<"conversations"> }): Promise<{ analysisId: string; analysis: any }> => { + // Get conversation data + const conversation: any = await ctx.runQuery(api.conversations.get, { conversationId: args.conversationId }); + + if (!conversation) { + throw new Error("Conversation not found"); + } + + // Get agent data + const agent: any = await ctx.runQuery(api.agents.get, { id: conversation.agentId }); + + if (!agent) { + throw new Error("Agent not found"); + } + + // Analyze conversation using AI + const analysis = await analyzeWithAI(conversation.messages, agent); + + // Store analysis + const analysisId: string = await ctx.runMutation(internal.conversationAnalysis.storeAnalysis, { + conversationId: args.conversationId, + agentId: conversation.agentId, + analysis, + }); + + return { + analysisId, + analysis, + }; + }, +}); + +/** + * AI-powered conversation analysis + */ +async function analyzeWithAI(messages: any[], agent: any) { + // Extract patterns + const userMessages = messages.filter(m => m.role === "user"); + const assistantMessages = messages.filter(m => m.role === "assistant"); + + const analysis = { + conversationMetrics: { + totalMessages: messages.length, + userTurns: userMessages.length, + assistantTurns: assistantMessages.length, + avgResponseLength: assistantMessages.length > 0 + ? assistantMessages.reduce((acc, m) => acc + m.content.length, 0) / assistantMessages.length + : 0, + }, + + identifiedIssues: [] as Array<{ + type: 'error' | 'misunderstanding' | 'missing_tool' | 'incorrect_response' | 'timeout'; + message: string; + userMessage: string; + agentResponse: string; + severity: 'high' | 'medium' | 'low'; + timestamp: number; + }>, + + successfulInteractions: [] as Array<{ + userMessage: string; + agentResponse: string; + timestamp: number; + }>, + + userCorrections: [] as Array<{ + originalMessage: string; + correctionMessage: string; + timestamp: number; + }>, + + missingCapabilities: [] as string[], + + suggestedImprovements: [] as Array<{ + type: 'add_tool' | 'modify_prompt' | 'change_model' | 'add_memory' | 'improve_error_handling'; + description: string; + priority: 'high' | 'medium' | 'low'; + implementation: string; + }>, + + performanceIssues: [] as Array<{ + issue: string; + impact: string; + recommendation: string; + }>, + }; + + // Analyze messages for issues + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + const nextMsg = messages[i + 1]; + + if (msg.role === "user" && nextMsg?.role === "assistant") { + const userContent = msg.content.toLowerCase(); + const assistantContent = nextMsg.content.toLowerCase(); + + // Detect errors + if (assistantContent.includes("error") || assistantContent.includes("failed") || assistantContent.includes("cannot")) { + analysis.identifiedIssues.push({ + type: 'error', + message: "Agent reported an error", + userMessage: msg.content, + agentResponse: nextMsg.content, + severity: 'high', + timestamp: msg.timestamp, + }); + } + + // Detect misunderstandings + if (userContent.includes("no") || userContent.includes("that's not what i meant") || userContent.includes("try again")) { + analysis.identifiedIssues.push({ + type: 'misunderstanding', + message: "User indicated misunderstanding", + userMessage: msg.content, + agentResponse: nextMsg.content, + severity: 'medium', + timestamp: msg.timestamp, + }); + + // This is likely a correction + if (i > 0) { + analysis.userCorrections.push({ + originalMessage: messages[i - 2]?.content || "", + correctionMessage: msg.content, + timestamp: msg.timestamp, + }); + } + } + + // Detect missing capabilities + if (assistantContent.includes("i don't have") || assistantContent.includes("i cannot") || assistantContent.includes("unable to")) { + const capabilityMatch = assistantContent.match(/(?:don't have|cannot|unable to)\s+([^.]+)/); + if (capabilityMatch) { + analysis.missingCapabilities.push(capabilityMatch[1].trim()); + } + } + + // Detect successful interactions (positive user feedback) + if (i < messages.length - 2) { + const followUp = messages[i + 2]; + if (followUp?.role === "user") { + const followUpContent = followUp.content.toLowerCase(); + if ( + followUpContent.includes("thanks") || + followUpContent.includes("perfect") || + followUpContent.includes("great") || + followUpContent.includes("good") || + followUpContent.includes("exactly") + ) { + analysis.successfulInteractions.push({ + userMessage: msg.content, + agentResponse: nextMsg.content, + timestamp: msg.timestamp, + }); + } + } + } + } + } + + // Generate improvement suggestions based on analysis + if (analysis.identifiedIssues.length > 0) { + const errorCount = analysis.identifiedIssues.filter(i => i.type === 'error').length; + if (errorCount > 2) { + analysis.suggestedImprovements.push({ + type: 'improve_error_handling', + description: `Agent encountered ${errorCount} errors. Improve error handling and validation.`, + priority: 'high', + implementation: `Add try-catch blocks and user-friendly error messages. Consider adding retry logic for failed operations.`, + }); + } + } + + if (analysis.missingCapabilities.length > 0) { + analysis.missingCapabilities.forEach(cap => { + analysis.suggestedImprovements.push({ + type: 'add_tool', + description: `Add capability: ${cap}`, + priority: 'high', + implementation: `Research and add appropriate tool or function to handle: ${cap}`, + }); + }); + } + + if (analysis.userCorrections.length > 2) { + analysis.suggestedImprovements.push({ + type: 'modify_prompt', + description: `Multiple user corrections detected (${analysis.userCorrections.length}). Agent may need clearer instructions.`, + priority: 'medium', + implementation: `Review system prompt and add more specific instructions about expected behavior.`, + }); + } + + // Check for performance issues + if (analysis.conversationMetrics.avgResponseLength > 1000) { + analysis.performanceIssues.push({ + issue: "Responses are very long (avg " + Math.round(analysis.conversationMetrics.avgResponseLength) + " chars)", + impact: "May frustrate users who want concise answers", + recommendation: "Add instruction to be more concise, or allow users to request detailed explanations", + }); + } + + return analysis; +} + +/** + * Store conversation analysis (internal mutation) + */ +export const storeAnalysis = internalMutation({ + args: { + conversationId: v.id("conversations"), + agentId: v.id("agents"), + analysis: v.any(), + }, + handler: async (ctx, { conversationId, agentId, analysis }) => { + const analysisId = await ctx.db.insert("conversationAnalyses", { + conversationId, + agentId, + analysis, + createdAt: Date.now(), + }); + + return analysisId; + }, +}); + +/** + * Get analysis for a conversation (internal query) + */ +export const getAnalysis = internalQuery({ + args: { + conversationId: v.id("conversations"), + }, + handler: async (ctx, { conversationId }) => { + return await ctx.db + .query("conversationAnalyses") + .withIndex("by_conversation", (q) => q.eq("conversationId", conversationId)) + .order("desc") + .first(); + }, +}); + +/** + * Get all analyses for an agent (internal query) + */ +export const getAgentAnalyses = internalQuery({ + args: { + agentId: v.id("agents"), + }, + handler: async (ctx, { agentId }) => { + return await ctx.db + .query("conversationAnalyses") + .withIndex("by_agent", (q) => q.eq("agentId", agentId)) + .order("desc") + .collect(); + }, +}); + +/** + * Generate agent improvement plan based on conversation + */ +export const generateImprovementPlan = action({ + args: { + conversationId: v.id("conversations"), + }, + handler: async (ctx: any, args: { conversationId: Id<"conversations"> }): Promise => { + // First analyze the conversation + const result: any = await ctx.runAction(api.conversationAnalysis.analyzeConversation, { + conversationId: args.conversationId, + }); + const analysis: any = result.analysis; + + // Generate comprehensive improvement plan + const improvementPlan: any = { + summary: generateSummary(analysis), + criticalIssues: analysis.identifiedIssues.filter((i: any) => i.severity === 'high'), + recommendedChanges: analysis.suggestedImprovements, + estimatedImpact: calculateImpact(analysis), + implementationSteps: generateImplementationSteps(analysis.suggestedImprovements), + }; + + return improvementPlan; + }, +}); + +function generateSummary(analysis: any): string { + const issueCount = analysis.identifiedIssues.length; + const successCount = analysis.successfulInteractions.length; + const correctionCount = analysis.userCorrections.length; + + return `Conversation Analysis: ${successCount} successful interactions, ${issueCount} issues detected, ${correctionCount} user corrections needed. ${analysis.suggestedImprovements.length} improvements suggested.`; +} + +function calculateImpact(analysis: any): { + currentScore: number; + projectedScore: number; + improvement: string; +} { + const total = analysis.conversationMetrics.totalMessages; + const issues = analysis.identifiedIssues.length; + const successes = analysis.successfulInteractions.length; + + const denom = successes + issues; + const currentScore = denom === 0 ? 0 : Math.max(0, Math.min(100, (successes / denom) * 100)); + const projectedScore = Math.min(100, currentScore + (analysis.suggestedImprovements.length * 10)); + + return { + currentScore: Math.round(currentScore), + projectedScore: Math.round(projectedScore), + improvement: `+${Math.round(projectedScore - currentScore)}%`, + }; +} + +function generateImplementationSteps(improvements: any[]): string[] { + return improvements.map((imp, idx) => { + return `${idx + 1}. [${imp.priority.toUpperCase()}] ${imp.description} - ${imp.implementation}`; + }); +} diff --git a/convex/deploymentRouter.ts b/convex/deploymentRouter.ts index e62b67e..227cc39 100644 --- a/convex/deploymentRouter.ts +++ b/convex/deploymentRouter.ts @@ -7,7 +7,13 @@ import { v } from "convex/values"; import { mutation, query } from "./_generated/server"; import { action } from "./_generated/server"; import { getAuthUserId } from "@convex-dev/auth/server"; -import { api } from "./_generated/api"; +import { api, internal } from "./_generated/api"; + +// Stripe mutations live in stripeMutations.ts. Cast bridges codegen gap. +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const internalStripeMutations = ( internal as any ).stripeMutations; +// Direct import for mutation handlers (mutations cannot call ctx.runMutation) +import { incrementUsageAndReportOverageImpl } from "./stripeMutations"; // Main deployment entry point - routes to correct tier export const deployAgent = action({ @@ -64,14 +70,17 @@ async function deployTier1(ctx: any, args: any, userId: any): Promise { if (!user) throw new Error("User not found"); - const testsThisMonth: number = user.testsThisMonth || 0; - const limit = 10; // Free tier limit + const executionsThisMonth: number = user.executionsThisMonth || 0; + // Use centralized tier config for limit + const { getTierConfig } = await import("./lib/tierConfig"); + const freeTierConfig = getTierConfig("freemium"); + const limit = freeTierConfig.monthlyExecutions; - if (testsThisMonth >= limit) { + if (executionsThisMonth >= limit) { return { success: false, error: "Free tier limit reached", - message: `You've used ${testsThisMonth}/${limit} free tests this month. Upgrade to deploy to your own AWS account!`, + message: `You've used ${executionsThisMonth}/${limit} free tests this month. Upgrade to Personal ($5/month) to deploy to your own AWS account!`, upgradeUrl: "/settings/aws", }; } @@ -84,6 +93,16 @@ async function deployTier1(ctx: any, args: any, userId: any): Promise { throw new Error("Agent not found"); } + // Validate model for AgentCore (Bedrock only) + if (!agent.model?.includes(".")) { + return { + success: false, + error: "Invalid model format", + message: "AgentCore requires AWS Bedrock models (e.g., anthropic.claude-sonnet-4-5). Upgrade to Personal tier for Ollama support.", + upgradeUrl: "/settings/aws", + }; + } + // Extract dependencies from agent tools const dependencies: string[] = []; for (const tool of agent.tools || []) { @@ -110,17 +129,18 @@ async function deployTier1(ctx: any, args: any, userId: any): Promise { throw new Error(result.error || "AgentCore deployment failed"); } - // Increment usage counter - await ctx.runMutation(api.deploymentRouter.incrementUsage, { + // Increment usage counter (centralized in stripeMutations.ts) + await ctx.runMutation( internalStripeMutations.incrementUsageAndReportOverage, { userId, - }); + modelId: agent.model, + } ); return { success: true, tier: "freemium", result, message: "Agent deployed to AgentCore sandbox", - upgradePrompt: `You have ${limit - testsThisMonth - 1} free tests remaining. Upgrade to deploy to your own AWS account!`, + upgradePrompt: `You have ${limit - executionsThisMonth - 1} free tests remaining. Upgrade to deploy to your own AWS account!`, }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); @@ -182,20 +202,21 @@ async function deployTier3(_ctx: any, _args: any, _userId: string): Promise } } -// Increment usage counter for freemium users +// Increment usage counter and report overage — delegates to shared helper +// in stripeMutations.ts (single source of truth for usage + overage logic). export const incrementUsage = mutation({ args: { userId: v.id("users"), + modelId: v.optional( v.string() ), }, handler: async (ctx, args) => { - // userId is already the Convex document ID - const user = await ctx.db.get(args.userId); - - if (!user) return; - - await ctx.db.patch(args.userId, { - testsThisMonth: (user.testsThisMonth || 0) + 1, - }); + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Not authenticated" ); + } + await incrementUsageAndReportOverageImpl( ctx, args.userId, { + modelId: args.modelId, + } ); }, }); @@ -210,7 +231,7 @@ export const resetMonthlyUsage = mutation({ for (const user of users) { await ctx.db.patch(user._id, { - testsThisMonth: 0, + executionsThisMonth: 0, }); } diff --git a/convex/errorLogging.ts b/convex/errorLogging.ts index cacfa10..9a489fd 100644 --- a/convex/errorLogging.ts +++ b/convex/errorLogging.ts @@ -1,6 +1,6 @@ /** * Centralized Error Logging and Audit System - * + * * This module provides comprehensive error logging and audit trail functionality * for OAuth authentication, MCP operations, and agent invocations. */ @@ -27,7 +27,7 @@ export const logError = mutation({ requestId: v.optional(v.string()), })), }, - handler: async (ctx, args) => { + handler: async (ctx: any, args: any): Promise => { const errorId = await ctx.db.insert("errorLogs", { category: args.category, severity: args.severity, diff --git a/convex/guardrails.ts b/convex/guardrails.ts index fe19196..02b1fa2 100644 --- a/convex/guardrails.ts +++ b/convex/guardrails.ts @@ -75,7 +75,7 @@ export function validateMessage( const warnings: string[] = []; // Check message length - if (message.length > config.maxTokensPerMessage * 4) { // Rough token estimation + if ( message.length > config.maxTokensPerMessage * 4 ) { // Rough token estimation return { allowed: false, reason: `Message too long. Maximum ${config.maxTokensPerMessage} tokens allowed.`, @@ -85,30 +85,30 @@ export function validateMessage( // Check for blocked keywords const lowerMessage = message.toLowerCase(); - for (const keyword of config.blockedKeywords) { - if (lowerMessage.includes(keyword.toLowerCase())) { - warnings.push(`Potentially sensitive keyword detected: ${keyword}`); + for ( const keyword of config.blockedKeywords ) { + if ( lowerMessage.includes( keyword.toLowerCase() ) ) { + warnings.push( `Potentially sensitive keyword detected: ${keyword}` ); } } // Check for PII patterns - if (config.contentFilters.enablePIIDetection) { + if ( config.contentFilters.enablePIIDetection ) { const piiPatterns = [ /\b\d{3}-\d{2}-\d{4}\b/, // SSN /\b\d{16}\b/, // Credit card /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/, // Email ]; - for (const pattern of piiPatterns) { - if (pattern.test(message)) { - warnings.push("Potential PII detected in message"); + for ( const pattern of piiPatterns ) { + if ( pattern.test( message ) ) { + warnings.push( "Potential PII detected in message" ); break; } } } // Check for code injection attempts - if (config.contentFilters.enableCodeInjectionPrevention) { + if ( config.contentFilters.enableCodeInjectionPrevention ) { const dangerousPatterns = [ /eval\s*\(/, /exec\s*\(/, @@ -117,8 +117,8 @@ export function validateMessage( /os\./, ]; - for (const pattern of dangerousPatterns) { - if (pattern.test(message)) { + for ( const pattern of dangerousPatterns ) { + if ( pattern.test( message ) ) { return { allowed: false, reason: "Potential code injection detected", @@ -140,11 +140,11 @@ export function checkRateLimits( timeWindow: number, config: GuardrailConfig = DEFAULT_GUARDRAILS ): { allowed: boolean; reason?: string; resetTime?: number } { - const messagesPerHour = messageCount; - const hoursElapsed = timeWindow / (1000 * 60 * 60); + const hoursElapsed = Math.max( timeWindow / ( 1000 * 60 * 60 ), 1 / 60 ); // min 1 minute + const messagesPerHour = messageCount / hoursElapsed; - if (messagesPerHour > config.maxMessagesPerHour) { - const resetTime = Date.now() + (60 * 60 * 1000); // 1 hour from now + if ( messagesPerHour > config.maxMessagesPerHour ) { + const resetTime = Date.now() + ( 60 * 60 * 1000 ); // 1 hour from now return { allowed: false, reason: `Rate limit exceeded. Maximum ${config.maxMessagesPerHour} messages per hour.`, @@ -162,16 +162,16 @@ export function calculateMessageCost( inputTokens: number, outputTokens: number, reasoningTokens: number = 0, - modelId: string = "us.anthropic.claude-3-5-haiku-20241022-v1:0" + modelId: string = "anthropic.claude-haiku-4-5-20251001-v1:0" ): number { // Claude Haiku 4.5 pricing (as of 2024) const inputCostPer1K = 0.00025; // $0.00025 per 1K input tokens const outputCostPer1K = 0.00125; // $0.00125 per 1K output tokens const reasoningCostPer1K = 0.00025; // Same as input for reasoning - const inputCost = (inputTokens / 1000) * inputCostPer1K; - const outputCost = (outputTokens / 1000) * outputCostPer1K; - const reasoningCost = (reasoningTokens / 1000) * reasoningCostPer1K; + const inputCost = ( inputTokens / 1000 ) * inputCostPer1K; + const outputCost = ( outputTokens / 1000 ) * outputCostPer1K; + const reasoningCost = ( reasoningTokens / 1000 ) * reasoningCostPer1K; return inputCost + outputCost + reasoningCost; } @@ -185,21 +185,21 @@ export function checkCostLimits( userCostThisHour: number, config: GuardrailConfig = DEFAULT_GUARDRAILS ): { allowed: boolean; reason?: string } { - if (estimatedCost > config.costLimits.maxCostPerMessage) { + if ( estimatedCost > config.costLimits.maxCostPerMessage ) { return { allowed: false, - reason: `Message cost ($${estimatedCost.toFixed(4)}) exceeds limit ($${config.costLimits.maxCostPerMessage})`, + reason: `Message cost ($${estimatedCost.toFixed( 4 )}) exceeds limit ($${config.costLimits.maxCostPerMessage})`, }; } - if (userCostThisHour + estimatedCost > config.costLimits.maxCostPerHour) { + if ( userCostThisHour + estimatedCost > config.costLimits.maxCostPerHour ) { return { allowed: false, reason: `Hourly cost limit ($${config.costLimits.maxCostPerHour}) would be exceeded`, }; } - if (userCostToday + estimatedCost > config.costLimits.maxCostPerDay) { + if ( userCostToday + estimatedCost > config.costLimits.maxCostPerDay ) { return { allowed: false, reason: `Daily cost limit ($${config.costLimits.maxCostPerDay}) would be exceeded`, @@ -212,7 +212,7 @@ export function checkCostLimits( /** * Sanitize system prompt to prevent prompt injection */ -export function sanitizeSystemPrompt(prompt: string): string { +export function sanitizeSystemPrompt( prompt: string ): string { // Remove potential prompt injection patterns const dangerousPatterns = [ /ignore\s+previous\s+instructions/gi, @@ -225,8 +225,8 @@ export function sanitizeSystemPrompt(prompt: string): string { ]; let sanitized = prompt; - for (const pattern of dangerousPatterns) { - sanitized = sanitized.replace(pattern, "[FILTERED]"); + for ( const pattern of dangerousPatterns ) { + sanitized = sanitized.replace( pattern, "[FILTERED]" ); } return sanitized; @@ -241,18 +241,18 @@ export function validateToolUsage( config: GuardrailConfig = DEFAULT_GUARDRAILS ): { allowed: boolean; reason?: string; requiresApproval: boolean } { // Check if tool requires approval - const requiresApproval = config.requireApprovalFor.some(category => - toolName.toLowerCase().includes(category.toLowerCase()) + const requiresApproval = config.requireApprovalFor.some( category => + toolName.toLowerCase().includes( category.toLowerCase() ) ); // Check for dangerous tool parameters - if (toolName === "web_search" || toolName === "http_request") { + if ( toolName === "web_search" || toolName === "http_request" ) { const url = parameters.url || parameters.query; - if (url) { - const domain = extractDomain(url); - if (domain && !config.allowedDomains.some(allowed => - domain.includes(allowed) || allowed.includes(domain) - )) { + if ( url ) { + const domain = extractDomain( url ); + if ( domain && !config.allowedDomains.some( allowed => + domain.includes( allowed ) || allowed.includes( domain ) + ) ) { return { allowed: false, reason: `Domain ${domain} not in allowed list`, @@ -263,7 +263,7 @@ export function validateToolUsage( } // Check for file system access - if (toolName.includes("file") || toolName.includes("write") || toolName.includes("delete")) { + if ( toolName.includes( "file" ) || toolName.includes( "write" ) || toolName.includes( "delete" ) ) { return { allowed: false, reason: "File system access not permitted", @@ -277,9 +277,9 @@ export function validateToolUsage( /** * Extract domain from URL */ -function extractDomain(url: string): string | null { +function extractDomain( url: string ): string | null { try { - const parsed = new URL(url); + const parsed = new URL( url ); return parsed.hostname; } catch { return null; @@ -305,27 +305,27 @@ export function generateGuardrailReport( let requiresApproval = false; // Collect message warnings - warnings.push(...messageValidation.warnings); - if (!messageValidation.allowed) { - errors.push(messageValidation.reason!); + warnings.push( ...messageValidation.warnings ); + if ( !messageValidation.allowed ) { + errors.push( messageValidation.reason! ); } // Check rate limits - if (!rateLimitCheck.allowed) { - errors.push(rateLimitCheck.reason!); + if ( !rateLimitCheck.allowed ) { + errors.push( rateLimitCheck.reason! ); } // Check cost limits - if (!costCheck.allowed) { - errors.push(costCheck.reason!); + if ( !costCheck.allowed ) { + errors.push( costCheck.reason! ); } // Check tool validations - for (const toolValidation of toolValidations) { - if (!toolValidation.allowed) { - errors.push(toolValidation.reason!); + for ( const toolValidation of toolValidations ) { + if ( !toolValidation.allowed ) { + errors.push( toolValidation.reason! ); } - if (toolValidation.requiresApproval) { + if ( toolValidation.requiresApproval ) { requiresApproval = true; } } diff --git a/convex/http.ts b/convex/http.ts index 960cae4..52ee3f9 100644 --- a/convex/http.ts +++ b/convex/http.ts @@ -1,8 +1,9 @@ import { httpRouter } from "convex/server"; import { httpAction } from "./_generated/server"; import { auth } from "./auth"; -import { api } from "./_generated/api"; +import { api, internal } from "./_generated/api"; import { validateEnvironment } from "./envValidator"; +import type Stripe from "stripe"; // Validate environment variables at module load time // This ensures critical configuration is present before handling requests @@ -411,4 +412,163 @@ http.route({ }), }); +// ─── Stripe Webhook ────────────────────────────────────────────────────────── +// Receives events from Stripe and updates user subscription state. +// Signature verification prevents forged webhook payloads. +// +// Handled events (7 total — register ALL in Stripe Dashboard → Webhooks): +// checkout.session.completed → Activate subscription on first checkout +// customer.subscription.updated → Sync plan changes / renewals +// customer.subscription.deleted → Downgrade to freemium on cancellation +// invoice.paid → Reset monthly usage counters +// invoice.payment_failed → Mark subscription past_due (gate blocks access) +// charge.dispute.created → Restrict account immediately (chargeback protection) +// charge.refunded → Log refund for monitoring + +http.route({ + path: "/stripe/webhook", + method: "POST", + handler: httpAction(async (ctx, request) => { + const { default: Stripe } = await import("stripe"); + + const secretKey = process.env.STRIPE_SECRET_KEY; + const webhookSecret = process.env.STRIPE_WEBHOOK_SECRET; + + if (!secretKey || !webhookSecret) { + console.error("Stripe webhook: Missing STRIPE_SECRET_KEY or STRIPE_WEBHOOK_SECRET env vars"); + return new Response("Server configuration error", { status: 500 }); + } + + const stripe = new Stripe(secretKey); + const signature = request.headers.get("stripe-signature"); + + if (!signature) { + return new Response("Missing stripe-signature header", { status: 400 }); + } + + let event: Stripe.Event; + try { + const body = await request.text(); + event = stripe.webhooks.constructEvent(body, signature, webhookSecret); + } catch (err: any) { + console.error(`Stripe webhook signature verification failed: ${err.message}`); + return new Response(`Webhook signature error: ${err.message}`, { status: 400 }); + } + + // Route events to internal mutations + try { + switch (event.type) { + case "checkout.session.completed": { + const session = event.data.object as Stripe.Checkout.Session; + if (session.customer && session.subscription) { + // Fetch subscription to get period end + // Note: current_period_end removed from Stripe SDK v20 types but still in API response + const sub = await stripe.subscriptions.retrieve( + session.subscription as string + ) as any; + await ctx.runMutation(internal.stripeMutations.updateSubscription, { + stripeCustomerId: session.customer as string, + subscriptionId: sub.id, + status: sub.status, + currentPeriodEnd: sub.current_period_end as number, + }); + } + break; + } + + case "customer.subscription.updated": { + const sub = event.data.object as any; // Stripe.Subscription - cast for current_period_end + await ctx.runMutation(internal.stripeMutations.updateSubscription, { + stripeCustomerId: sub.customer as string, + subscriptionId: sub.id, + status: sub.status, + currentPeriodEnd: sub.current_period_end as number, + }); + break; + } + + case "customer.subscription.deleted": { + const sub = event.data.object as Stripe.Subscription; + await ctx.runMutation(internal.stripeMutations.cancelSubscription, { + stripeCustomerId: sub.customer as string, + }); + break; + } + + case "invoice.paid": { + const invoice = event.data.object as Stripe.Invoice; + if (invoice.customer) { + await ctx.runMutation(internal.stripeMutations.resetMonthlyUsage, { + stripeCustomerId: invoice.customer as string, + periodStart: invoice.period_start, + }); + } + break; + } + + case "invoice.payment_failed": { + const invoice = event.data.object as Stripe.Invoice; + if (invoice.customer) { + await ctx.runMutation(internal.stripeMutations.markPastDue, { + stripeCustomerId: invoice.customer as string, + }); + } + break; + } + + case "charge.dispute.created": { + // Stripe Dispute: customer is on the linked charge, not top-level. + // Use the raw event data which includes customer as a string. + const disputeData = event.data.object as any; + const disputeCustomer: string | undefined = + typeof disputeData.customer === "string" + ? disputeData.customer + : typeof disputeData.charge === "string" + ? undefined // charge ID only — need to look up; skip for now + : disputeData.charge?.customer; + if ( disputeCustomer ) { + await ctx.runMutation(internal.stripeMutations.restrictAccountForDispute, { + stripeCustomerId: disputeCustomer, + }); + } + break; + } + + case "charge.refunded": { + const charge = event.data.object as Stripe.Charge; + const refundCustomer = typeof charge.customer === "string" + ? charge.customer + : typeof charge.customer === "object" && charge.customer !== null + ? charge.customer.id + : undefined; + if ( refundCustomer ) { + await ctx.runMutation(internal.stripeMutations.handleChargeRefund, { + stripeCustomerId: refundCustomer, + amountRefunded: charge.amount_refunded, + }); + } + break; + } + + default: + // Unhandled event type - return 200 per Stripe best practices + break; + } + } catch (handlerError: any) { + // Return 500 so Stripe retries — mutations should be idempotent. + // Log event.id to correlate retries and detect repeated failures. + console.error(`Stripe webhook handler error for event ${event.id} (${event.type}): ${handlerError.message}`, handlerError); + return new Response( + JSON.stringify({ error: "Webhook handler error", eventId: event.id }), + { status: 500, headers: { "Content-Type": "application/json" } } + ); + } + + return new Response(JSON.stringify({ received: true }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + }), +}); + export default http; diff --git a/convex/integration.test.ts b/convex/integration.test.ts index 4dc1e9d..8593530 100644 --- a/convex/integration.test.ts +++ b/convex/integration.test.ts @@ -1359,7 +1359,7 @@ describe("Deployment Integration Tests", () => { email: "tier1-all@example.com", name: "Tier 1 All Tests User", tier: "freemium", - testsThisMonth: 0, + executionsThisMonth: 0, createdAt: Date.now(), }); }); @@ -1407,7 +1407,7 @@ describe("Deployment Integration Tests", () => { email: "tier1-usage-limits@example.com", name: "Tier 1 Usage Limits User", tier: "freemium", - testsThisMonth: 0, + executionsThisMonth: 0, createdAt: Date.now(), }); }); @@ -1429,19 +1429,19 @@ describe("Deployment Integration Tests", () => { // Test usage increment const userBefore = await t.query(api.deploymentRouter.getUserTier); - const initialUsage = userBefore?.testsThisMonth || 0; + const initialUsage = userBefore?.executionsThisMonth || 0; await t.mutation(api.deploymentRouter.incrementUsage, { userId: testUserId, }); const userAfter = await t.query(api.deploymentRouter.getUserTier); - expect(userAfter?.testsThisMonth).toBe(initialUsage + 1); + expect(userAfter?.executionsThisMonth).toBe(initialUsage + 1); // Test usage limit enforcement await t.run(async (ctx) => { await ctx.db.patch(testUserId, { - testsThisMonth: 10, // At the limit + executionsThisMonth: 10, // At the limit }); }); @@ -1958,7 +1958,7 @@ describe("Deployment Integration Tests", () => { email: "freemium1@example.com", name: "Freemium User 1", tier: "freemium", - testsThisMonth: 5, + executionsThisMonth: 5, createdAt: Date.now(), }); @@ -1967,7 +1967,7 @@ describe("Deployment Integration Tests", () => { email: "freemium2@example.com", name: "Freemium User 2", tier: "freemium", - testsThisMonth: 8, + executionsThisMonth: 8, createdAt: Date.now(), }); }); @@ -1987,7 +1987,7 @@ describe("Deployment Integration Tests", () => { }); users.forEach((user) => { - expect(user.testsThisMonth).toBe(0); + expect(user.executionsThisMonth).toBe(0); }); }); @@ -2001,7 +2001,7 @@ describe("Deployment Integration Tests", () => { email: "personal1@example.com", name: "Personal User 1", tier: "personal", - testsThisMonth: 50, + executionsThisMonth: 50, createdAt: Date.now(), }); }); @@ -2017,7 +2017,7 @@ describe("Deployment Integration Tests", () => { .first(); }); - expect(user?.testsThisMonth).toBe(50); + expect(user?.executionsThisMonth).toBe(50); }); }); }); diff --git a/convex/interleavedReasoning.ts b/convex/interleavedReasoning.ts index 7f77419..39d4da4 100644 --- a/convex/interleavedReasoning.ts +++ b/convex/interleavedReasoning.ts @@ -15,20 +15,20 @@ const SLIDING_WINDOW_SIZE = 10; // Keep last 10 messages in active window /** * Create a new conversation with interleaved reasoning */ -export const createConversation = mutation({ +export const createConversation = mutation( { args: { - title: v.optional(v.string()), - systemPrompt: v.optional(v.string()), + title: v.optional( v.string() ), + systemPrompt: v.optional( v.string() ), }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - + handler: async ( ctx: any, args: { title?: string; systemPrompt?: string } ): Promise<{ conversationId: string; conversationToken?: string }> => { + const userId = await getAuthUserId( ctx ); + // Generate token for anonymous users - const conversationToken = userId - ? undefined - : `anon_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + const conversationToken = userId + ? undefined + : `anon_${Date.now()}_${Math.random().toString( 36 ).substr( 2, 9 )}`; - const conversationId = await ctx.db.insert("interleavedConversations", { + const conversationId = await ctx.db.insert( "interleavedConversations", { userId: userId || undefined, conversationToken, title: args.title || "New Agent", @@ -47,79 +47,77 @@ YOU HAVE UNLIMITED CAPABILITIES to accomplish this: - Performance and cost analysis YOUR WORKFLOW: -1. Deeply understand requirements through questions -2. Research and analyze best approaches -3. Design optimal agent architecture -4. Create necessary tools and integrations -5. Generate complete, production-ready code -6. Validate implementation quality - -AGENT BUILDING PRINCIPLES: -- Build intelligent, workflow-oriented agents (not simple chatbots) -- Create custom tools when needed using @tool decorator -- Include preprocessing/postprocessing hooks for complex logic -- Generate ALL 4 required files: agent.py, mcp.json, Dockerfile, cloudformation.yaml -- Ensure production-ready code with error handling and logging -- Optimize for performance, cost, and scalability - -Think deeply, research thoroughly, and build exceptional agents.`, +- Test agents by simulating real-world scenarios and edge cases +- Provide detailed feedback on agent performance and behavior +- Suggest specific improvements when agents fail or underperform +- Help users understand agent capabilities and limitations +- Analyze conversation patterns to identify optimization opportunities + +AGENT TESTING PRINCIPLES: +- Thoroughly test all agent capabilities and tools +- Document unexpected behaviors and edge cases +- Provide actionable recommendations for improvements +- Help users iterate and refine their agents +- Consider cost, performance, and accuracy tradeoffs + +Think deeply about agent behavior and provide thoughtful testing insights.`, messageCount: 0, contextSize: 0, s3ContextKey: undefined, createdAt: Date.now(), updatedAt: Date.now(), isActive: true, - }); + } ); return { conversationId, conversationToken }; }, -}); +} ); /** * Send a message with interleaved reasoning */ -export const sendMessage: any = action({ +export const sendMessage: any = action( { args: { - conversationId: v.id("interleavedConversations"), - conversationToken: v.optional(v.string()), + conversationId: v.id( "interleavedConversations" ), + conversationToken: v.optional( v.string() ), message: v.string(), }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); + handler: async ( ctx: any, args: any ): Promise => { + const userId = await getAuthUserId( ctx ); // Get conversation - const conversation = await ctx.runQuery(internal.interleavedReasoning.getConversationInternal, { + const conversation = await ctx.runQuery( internal.interleavedReasoning.getConversationInternal, { conversationId: args.conversationId, userId: userId || undefined, conversationToken: args.conversationToken, - }); + } ); - if (!conversation) { - throw new Error("Conversation not found or access denied"); + if ( !conversation ) { + throw new Error( "Conversation not found or access denied" ); } // NO GUARDRAILS - removed all validation/rate-limiting queries // Trust the user, minimize database operations // Get conversation history BEFORE adding new message - const history = await ctx.runQuery(internal.interleavedReasoning.getConversationHistory, { + const history = await ctx.runQuery( internal.interleavedReasoning.getConversationHistory, { conversationId: args.conversationId, windowSize: SLIDING_WINDOW_SIZE, - }); + } ); // If conversation has an associated agent, use strands-agents execution // Otherwise, fall back to direct Claude invocation let response; - if (conversation.agentId) { + if ( conversation.agentId ) { // Use strands-agents framework via AgentCore - const agentResult = await ctx.runAction(api.strandsAgentExecution.executeAgentWithStrandsAgents, { + const agentResult = await ctx.runAction( api.strandsAgentExecution.executeAgentWithStrandsAgents, { agentId: conversation.agentId, conversationId: args.conversationId, message: args.message, - }); + } ); - if (!agentResult.success) { - throw new Error(agentResult.error || "Agent execution failed"); + if ( !agentResult.success ) { + throw new Error( agentResult.error || "Agent execution failed" ); } response = { @@ -128,30 +126,52 @@ export const sendMessage: any = action({ toolCalls: agentResult.toolCalls, }; } else { + // Gate: enforce tier-based Bedrock access before direct Claude invocation + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const modelId = process.env.AGENT_BUILDER_MODEL_ID || "anthropic.claude-haiku-4-5-20251001-v1:0"; + const gateResult = await requireBedrockAccess( + ctx, + modelId, + async ( args ) => ctx.runQuery( internal.users.getInternal, args ), + ); + if ( !gateResult.allowed ) { + throw new Error( gateResult.reason ); + } + // Fall back to direct Claude Haiku 4.5 with interleaved thinking response = await invokeClaudeWithInterleavedThinking( conversation.systemPrompt, history, args.message ); + + // Meter token usage for billing + if ( response.tokenUsage && gateResult.allowed ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId, + inputTokens: response.tokenUsage.inputTokens, + outputTokens: response.tokenUsage.outputTokens, + } ); + } } // Batch insert BOTH messages in a single mutation (1 WRITE PER TURN) - await ctx.runMutation(internal.interleavedReasoning.addMessageBatch, { + await ctx.runMutation( internal.interleavedReasoning.addMessageBatch, { conversationId: args.conversationId, messages: [ { - role: "user" as const, + role: "user", content: args.message, }, { - role: "assistant" as const, + role: "assistant", content: response.content, reasoning: response.reasoning, toolCalls: response.toolCalls, }, ], - }); + } ); // NO automatic S3 offload - let it be triggered manually or by separate process // This keeps sendMessage purely event-driven with zero scheduled tasks @@ -162,45 +182,45 @@ export const sendMessage: any = action({ toolCalls: response.toolCalls, }; }, -}); +} ); /** * Internal helper: count user messages within a time window */ -export const getUserMessageCount = internalQuery({ +export const getUserMessageCount = internalQuery( { args: { - userId: v.optional(v.id("users")), - conversationToken: v.optional(v.string()), + userId: v.optional( v.id( "users" ) ), + conversationToken: v.optional( v.string() ), timeWindow: v.number(), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { const since = Date.now() - args.timeWindow; let conversations: any[] = []; - if (args.userId) { + if ( args.userId ) { conversations = await ctx.db - .query("interleavedConversations") - .withIndex("by_user", (q) => q.eq("userId", args.userId!)) + .query( "interleavedConversations" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", args.userId ) ) .collect(); - } else if (args.conversationToken) { + } else if ( args.conversationToken ) { const conversation = await ctx.db - .query("interleavedConversations") - .withIndex("by_token", (q) => q.eq("conversationToken", args.conversationToken!)) + .query( "interleavedConversations" ) + .withIndex( "by_token", ( q ) => q.eq( "conversationToken", args.conversationToken ) ) .first(); - if (conversation) { + if ( conversation ) { conversations = [conversation]; } } - if (conversations.length === 0) { + if ( conversations.length === 0 ) { return 0; } let count = 0; - for (const conversation of conversations) { + for ( const conversation of conversations ) { const messages: any[] = conversation.messages || []; - for (const message of messages) { - if (message.timestamp && message.timestamp >= since) { + for ( const message of messages ) { + if ( message.timestamp && message.timestamp >= since ) { count += 1; } } @@ -208,22 +228,22 @@ export const getUserMessageCount = internalQuery({ return count; }, -}); +} ); /** * Internal helper: aggregate user cost for current day * Currently returns 0 until detailed cost tracking is implemented. */ -export const getUserCostToday = internalQuery({ +export const getUserCostToday = internalQuery( { args: { - userId: v.optional(v.id("users")), - conversationToken: v.optional(v.string()), + userId: v.optional( v.id( "users" ) ), + conversationToken: v.optional( v.string() ), }, handler: async () => { // Cost tracking is not yet implemented; return 0 so guardrails remain permissive. return 0; }, -}); +} ); /** * Invoke Claude Haiku 4.5 with interleaved thinking @@ -232,33 +252,40 @@ async function invokeClaudeWithInterleavedThinking( systemPrompt: string, history: any[], userMessage: string -): Promise<{ content: string; reasoning?: string; toolCalls?: any[] }> { - const { BedrockRuntimeClient, InvokeModelCommand } = await import("@aws-sdk/client-bedrock-runtime"); +): Promise<{ content: string; reasoning?: string; toolCalls?: any[]; tokenUsage: { inputTokens: number; outputTokens: number; totalTokens: number } }> { + const { BedrockRuntimeClient, InvokeModelCommand } = await import( "@aws-sdk/client-bedrock-runtime" ); + + const accessKeyId = process.env.AWS_ACCESS_KEY_ID; + const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; - const client = new BedrockRuntimeClient({ + if ( !accessKeyId || !secretAccessKey ) { + throw new Error( "Missing AWS credentials: ensure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set in the environment" ); + } + + const client = new BedrockRuntimeClient( { region: process.env.AWS_REGION || "us-east-1", credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + accessKeyId, + secretAccessKey, }, - }); + } ); // Build messages array const messages: any[] = []; - + // Add conversation history - for (const msg of history) { - messages.push({ + for ( const msg of history ) { + messages.push( { role: msg.role, content: [{ text: msg.content }], - }); + } ); } // Add current user message - messages.push({ + messages.push( { role: "user", content: [{ text: userMessage }], - }); + } ); // Prepare request with interleaved thinking enabled const payload = { @@ -274,32 +301,38 @@ async function invokeClaudeWithInterleavedThinking( }, }; - const command = new InvokeModelCommand({ - modelId: "us.anthropic.claude-3-5-haiku-20241022-v1:0", // Claude Haiku 4.5 + const modelId = process.env.AGENT_BUILDER_MODEL_ID || "anthropic.claude-haiku-4-5-20251001-v1:0"; + + const command = new InvokeModelCommand( { + modelId, contentType: "application/json", accept: "application/json", - body: JSON.stringify(payload), - }); + body: JSON.stringify( payload ), + } ); + + const response: any = await client.send( command ); + const responseBody = JSON.parse( new TextDecoder().decode( response.body ) ); - const response: any = await client.send(command); - const responseBody = JSON.parse(new TextDecoder().decode(response.body)); + // Extract token usage for billing + const { extractTokenUsage } = await import( "./lib/tokenBilling" ); + const tokenUsage = extractTokenUsage( responseBody, modelId ); // Extract content and reasoning let content = ""; let reasoning = ""; const toolCalls: any[] = []; - for (const block of responseBody.content || []) { - if (block.type === "text") { + for ( const block of responseBody.content || [] ) { + if ( block.type === "text" ) { content += block.text; - } else if (block.type === "thinking") { + } else if ( block.type === "thinking" ) { reasoning += block.thinking; - } else if (block.type === "tool_use") { - toolCalls.push({ + } else if ( block.type === "tool_use" ) { + toolCalls.push( { id: block.id, name: block.name, input: block.input, - }); + } ); } } @@ -307,6 +340,7 @@ async function invokeClaudeWithInterleavedThinking( content: content.trim(), reasoning: reasoning.trim() || undefined, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + tokenUsage, }; } @@ -314,23 +348,23 @@ async function invokeClaudeWithInterleavedThinking( * Add message to conversation (internal) * Now inserts individual message documents instead of rewriting entire array */ -export const addMessage = internalMutation({ +export const addMessage = internalMutation( { args: { - conversationId: v.id("interleavedConversations"), - role: v.union(v.literal("user"), v.literal("assistant")), + conversationId: v.id( "interleavedConversations" ), + role: v.union( v.literal( "user" ), v.literal( "assistant" ) ), content: v.string(), - reasoning: v.optional(v.string()), - toolCalls: v.optional(v.any()), + reasoning: v.optional( v.string() ), + toolCalls: v.optional( v.any() ), }, - handler: async (ctx, args) => { - const conversation = await ctx.db.get(args.conversationId); - if (!conversation) { - throw new Error("Conversation not found"); + handler: async ( ctx, args ) => { + const conversation = await ctx.db.get( args.conversationId ); + if ( !conversation ) { + throw new Error( "Conversation not found" ); } const timestamp = Date.now(); - await ctx.db.insert("interleavedMessages", { + await ctx.db.insert( "interleavedMessages", { conversationId: args.conversationId, role: args.role, content: args.content, @@ -338,357 +372,370 @@ export const addMessage = internalMutation({ toolCalls: args.toolCalls, timestamp, sequenceNumber: timestamp, // Use timestamp as sequence for ordering - }); + } ); - await ctx.db.patch(args.conversationId, { - messageCount: (conversation.messageCount ?? 0) + 1, + await ctx.db.patch( args.conversationId, { + messageCount: ( conversation.messageCount ?? 0 ) + 1, updatedAt: timestamp, - }); + } ); }, -}); +} ); /** * Add multiple messages in a single transaction (BATCH INSERT) * Used to add user + assistant messages together = 1 WRITE PER TURN */ -export const addMessageBatch = internalMutation({ +export const addMessageBatch = internalMutation( { args: { - conversationId: v.id("interleavedConversations"), - messages: v.array(v.object({ - role: v.union(v.literal("user"), v.literal("assistant")), + conversationId: v.id( "interleavedConversations" ), + messages: v.array( v.object( { + role: v.union( v.literal( "user" ), v.literal( "assistant" ) ), content: v.string(), - reasoning: v.optional(v.string()), - toolCalls: v.optional(v.any()), - })), - }, - handler: async (ctx, args) => { - const conversation = await ctx.db.get(args.conversationId); - if (!conversation) { - throw new Error("Conversation not found"); + reasoning: v.optional( v.string() ), + toolCalls: v.optional( v.any() ), + } ) ), + }, + handler: async ( ctx, args ) => { + const conversation = await ctx.db.get( args.conversationId ); + if ( !conversation ) { + throw new Error( "Conversation not found" ); } - const timestamp = Date.now(); + const baseTimestamp = Date.now(); - for (const msg of args.messages) { - await ctx.db.insert("interleavedMessages", { + for ( let i = 0; i < args.messages.length; i++ ) { + const msg = args.messages[i]; + await ctx.db.insert( "interleavedMessages", { conversationId: args.conversationId, role: msg.role, content: msg.content, reasoning: msg.reasoning, toolCalls: msg.toolCalls, - timestamp, - sequenceNumber: timestamp, // Use timestamp as sequence for ordering - }); + timestamp: baseTimestamp, + sequenceNumber: baseTimestamp + i, // Monotonically increasing for deterministic batch ordering + } ); } - await ctx.db.patch(args.conversationId, { - messageCount: (conversation.messageCount ?? 0) + args.messages.length, - updatedAt: timestamp, - }); + await ctx.db.patch( args.conversationId, { + messageCount: ( conversation.messageCount ?? 0 ) + args.messages.length, + updatedAt: baseTimestamp, + } ); }, -}); +} ); /** * Get conversation history with sliding window (internal) * Now fetches from interleavedMessages table instead of embedded array */ -export const getConversationHistory = internalQuery({ +export const getConversationHistory = internalQuery( { args: { - conversationId: v.id("interleavedConversations"), - windowSize: v.optional(v.number()), + conversationId: v.id( "interleavedConversations" ), + windowSize: v.optional( v.number() ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { const windowSize = args.windowSize || SLIDING_WINDOW_SIZE; // Fetch all messages and take last N (sliding window) const allMessages = await ctx.db - .query("interleavedMessages") - .withIndex("by_timestamp", (q) => q.eq("conversationId", args.conversationId)) - .order("desc") - .take(windowSize); + .query( "interleavedMessages" ) + .withIndex( "by_timestamp", ( q ) => q.eq( "conversationId", args.conversationId ) ) + .order( "desc" ) + .take( windowSize ); // Return in chronological order return allMessages.reverse(); }, -}); +} ); /** * Get conversation (internal - no auth check) */ -export const getConversationInternal = internalQuery({ +export const getConversationInternal = internalQuery( { args: { - conversationId: v.id("interleavedConversations"), - userId: v.optional(v.id("users")), - conversationToken: v.optional(v.string()), + conversationId: v.id( "interleavedConversations" ), + userId: v.optional( v.id( "users" ) ), + conversationToken: v.optional( v.string() ), }, - handler: async (ctx, args) => { - const conversation = await ctx.db.get(args.conversationId); - if (!conversation) { + handler: async ( ctx, args ) => { + const conversation = await ctx.db.get( args.conversationId ); + if ( !conversation ) { return null; } // Check access: either user owns it or has the token - if (args.userId && conversation.userId === args.userId) { + if ( args.userId && conversation.userId === args.userId ) { return conversation; } - if (args.conversationToken && conversation.conversationToken === args.conversationToken) { + if ( args.conversationToken && conversation.conversationToken === args.conversationToken ) { return conversation; } return null; }, -}); +} ); /** * Get context size (internal) - computed on-demand from messages */ -export const getContextSize = internalQuery({ +export const getContextSize = internalQuery( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { // Compute context size by summing message lengths const messages = await ctx.db - .query("interleavedMessages") - .withIndex("by_conversation", (q) => q.eq("conversationId", args.conversationId)) + .query( "interleavedMessages" ) + .withIndex( "by_conversation", ( q ) => q.eq( "conversationId", args.conversationId ) ) .collect(); return messages.reduce( - (sum, msg) => sum + msg.content.length + (msg.reasoning?.length || 0), + ( sum, msg ) => sum + msg.content.length + ( msg.reasoning?.length || 0 ), 0 ); }, -}); +} ); /** * Get all messages for S3 archival (internal) */ -export const getAllMessages = internalQuery({ +export const getAllMessages = internalQuery( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { const messages = await ctx.db - .query("interleavedMessages") - .withIndex("by_conversation", (q) => q.eq("conversationId", args.conversationId)) + .query( "interleavedMessages" ) + .withIndex( "by_conversation", ( q ) => q.eq( "conversationId", args.conversationId ) ) .collect(); - return messages.sort((a, b) => a.sequenceNumber - b.sequenceNumber); + return messages.sort( ( a, b ) => a.sequenceNumber - b.sequenceNumber ); }, -}); +} ); /** * Check context size and offload to S3 if needed (background task) */ -export const checkAndOffloadToS3 = internalAction({ +export const checkAndOffloadToS3 = internalAction( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), }, - handler: async (ctx, args) => { - const contextSize = await ctx.runQuery(internal.interleavedReasoning.getContextSize, { + handler: async ( ctx, args ) => { + const contextSize = await ctx.runQuery( internal.interleavedReasoning.getContextSize, { conversationId: args.conversationId, - }); + } ); - if (contextSize > S3_THRESHOLD) { - await ctx.runAction(internal.interleavedReasoning.moveContextToS3, { + if ( contextSize > S3_THRESHOLD ) { + await ctx.runAction( internal.interleavedReasoning.moveContextToS3, { conversationId: args.conversationId, - }); + } ); } }, -}); +} ); /** * Move large context to S3 (internal) * Now fetches messages from interleavedMessages table */ -export const moveContextToS3 = internalAction({ +export const moveContextToS3 = internalAction( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { // Fetch conversation metadata - const conversation = await ctx.runQuery(internal.interleavedReasoning.getConversationById, { + const conversation = await ctx.runQuery( internal.interleavedReasoning.getConversationById, { conversationId: args.conversationId, - }); + } ); - if (!conversation) { + if ( !conversation ) { return; } // Fetch all messages from interleavedMessages table - const messages = await ctx.runQuery(internal.interleavedReasoning.getAllMessages, { + const messages = await ctx.runQuery( internal.interleavedReasoning.getAllMessages, { conversationId: args.conversationId, - }); + } ); - if (!messages || messages.length === 0) { + if ( !messages || messages.length === 0 ) { return; } // Upload to S3 - const { S3Client, PutObjectCommand } = await import("@aws-sdk/client-s3"); + const { S3Client, PutObjectCommand } = await import( "@aws-sdk/client-s3" ); + + const s3AccessKeyId = process.env.AWS_ACCESS_KEY_ID; + const s3SecretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; - const s3Client = new S3Client({ + if ( !s3AccessKeyId || !s3SecretAccessKey ) { + throw new Error( "Missing AWS credentials for S3: ensure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY are set" ); + } + + const s3Client = new S3Client( { region: process.env.AWS_REGION || "us-east-1", credentials: { - accessKeyId: process.env.AWS_ACCESS_KEY_ID!, - secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + accessKeyId: s3AccessKeyId, + secretAccessKey: s3SecretAccessKey, }, - }); + } ); + + const s3Bucket = process.env.AWS_S3_BUCKET; + if ( !s3Bucket ) { + throw new Error( "Missing AWS_S3_BUCKET environment variable: ensure it is set before offloading context to S3" ); + } const s3Key = `conversations/${args.conversationId}/context_${Date.now()}.json`; - await s3Client.send(new PutObjectCommand({ - Bucket: process.env.AWS_S3_BUCKET!, + await s3Client.send( new PutObjectCommand( { + Bucket: s3Bucket, Key: s3Key, - Body: JSON.stringify(messages), + Body: JSON.stringify( messages ), ContentType: "application/json", Metadata: { conversationId: args.conversationId, userId: conversation.userId || "anonymous", timestamp: Date.now().toString(), }, - })); + } ) ); // Update conversation with S3 reference - await ctx.runMutation(internal.interleavedReasoning.updateS3Reference, { + await ctx.runMutation( internal.interleavedReasoning.updateS3Reference, { conversationId: args.conversationId, s3Key, - }); + } ); // Keep only recent messages in Convex (sliding window) - await ctx.runMutation(internal.interleavedReasoning.trimMessages, { + await ctx.runMutation( internal.interleavedReasoning.trimMessages, { conversationId: args.conversationId, keepLast: SLIDING_WINDOW_SIZE, - }); + } ); }, -}); +} ); /** * Update S3 reference (internal) */ -export const updateS3Reference = internalMutation({ +export const updateS3Reference = internalMutation( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), s3Key: v.string(), }, - handler: async (ctx, args) => { - await ctx.db.patch(args.conversationId, { + handler: async ( ctx, args ) => { + await ctx.db.patch( args.conversationId, { s3ContextKey: args.s3Key, updatedAt: Date.now(), - }); + } ); }, -}); +} ); /** * Trim messages to keep only recent ones (internal) * Now deletes old message documents from interleavedMessages table */ -export const trimMessages = internalMutation({ +export const trimMessages = internalMutation( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), keepLast: v.number(), }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { // Get all messages sorted by timestamp (desc) const allMessages = await ctx.db - .query("interleavedMessages") - .withIndex("by_timestamp", (q) => q.eq("conversationId", args.conversationId)) - .order("desc") + .query( "interleavedMessages" ) + .withIndex( "by_timestamp", ( q ) => q.eq( "conversationId", args.conversationId ) ) + .order( "desc" ) .collect(); // Keep last N, delete the rest - const toDelete = allMessages.slice(args.keepLast); + const toDelete = allMessages.slice( args.keepLast ); // Delete old messages - for (const msg of toDelete) { - await ctx.db.delete(msg._id); + for ( const msg of toDelete ) { + await ctx.db.delete( msg._id ); } // NO conversation patch - contextSize computed on-demand // This is a pure cleanup operation with minimal writes }, -}); +} ); /** * Get conversation by ID (internal) */ -export const getConversationById = internalQuery({ +export const getConversationById = internalQuery( { args: { - conversationId: v.id("interleavedConversations"), + conversationId: v.id( "interleavedConversations" ), }, - handler: async (ctx, args) => { - return await ctx.db.get(args.conversationId); + handler: async ( ctx, args ) => { + return await ctx.db.get( args.conversationId ); }, -}); +} ); /** * Get user conversations */ -export const getUserConversations = query({ +export const getUserConversations = query( { args: { - limit: v.optional(v.number()), + limit: v.optional( v.number() ), }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - if (!userId) { + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { return []; } const conversations = await ctx.db - .query("interleavedConversations") - .withIndex("by_user", (q) => q.eq("userId", userId)) - .order("desc") - .take(args.limit || 20); + .query( "interleavedConversations" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userId ) ) + .order( "desc" ) + .take( args.limit || 20 ); return conversations; }, -}); +} ); /** * Get conversation (public - for frontend) * Now fetches messages from interleavedMessages table */ -export const getConversation = query({ +export const getConversation = query( { args: { - conversationId: v.id("interleavedConversations"), - conversationToken: v.optional(v.string()), + conversationId: v.id( "interleavedConversations" ), + conversationToken: v.optional( v.string() ), }, - handler: async (ctx, args) => { - const userId = await getAuthUserId(ctx); - const conversation = await ctx.db.get(args.conversationId); + handler: async ( ctx, args ) => { + const userId = await getAuthUserId( ctx ); + const conversation = await ctx.db.get( args.conversationId ); - if (!conversation) { + if ( !conversation ) { return null; } // Check access: either user owns it or has the token - const hasAccess = (userId && conversation.userId === userId) || - (args.conversationToken && conversation.conversationToken === args.conversationToken); + const hasAccess = ( userId && conversation.userId === userId ) || + ( args.conversationToken && conversation.conversationToken === args.conversationToken ); - if (!hasAccess) { + if ( !hasAccess ) { return null; } // Fetch all messages for this conversation (reactive query) const messages = await ctx.db - .query("interleavedMessages") - .withIndex("by_conversation", (q) => q.eq("conversationId", args.conversationId)) + .query( "interleavedMessages" ) + .withIndex( "by_conversation", ( q ) => q.eq( "conversationId", args.conversationId ) ) .collect(); // Sort by sequence number - const sortedMessages = messages.sort((a, b) => a.sequenceNumber - b.sequenceNumber); + const sortedMessages = messages.sort( ( a, b ) => a.sequenceNumber - b.sequenceNumber ); // Return conversation with messages (for compatibility with existing UI) return { ...conversation, - messages: sortedMessages.map((m) => ({ + messages: sortedMessages.map( ( m ) => ( { role: m.role, content: m.content, reasoning: m.reasoning, toolCalls: m.toolCalls, timestamp: m.timestamp, - })), + } ) ), }; }, -}); +} ); diff --git a/convex/lib/bedrockGate.ts b/convex/lib/bedrockGate.ts new file mode 100644 index 0000000..80b52ab --- /dev/null +++ b/convex/lib/bedrockGate.ts @@ -0,0 +1,218 @@ +/** + * Centralized Bedrock Access Gate + * + * Single module that enforces tier-based access to Bedrock models. + * Call requireBedrockAccess() or requireBedrockAccessForUser() BEFORE + * any Bedrock API invocation. + * + * ALL Bedrock gating decisions go through this module. + * Do NOT add scattered tier checks in individual action files. + * + * Imports tier logic from tierConfig.ts (the single source of truth for + * tier limits and allowed providers/models). + */ + +import { getAuthUserId } from "@convex-dev/auth/server"; +import { + isProviderAllowedForTier, + isBedrockModelAllowedForTier, + checkExecutionLimit, + getUpgradeMessage, + getTierConfig, + type TierName, +} from "./tierConfig"; + +// ─── Result Types ──────────────────────────────────────────────────────────── + +export interface BedrockAccessGranted { + allowed: true; + userId: string; + tier: TierName; +} + +export interface BedrockAccessDenied { + allowed: false; + reason: string; + upgradeMessage: string; +} + +export type BedrockGateResult = BedrockAccessGranted | BedrockAccessDenied; + +// ─── Variant A: JWT-authenticated callers ──────────────────────────────────── + +/** + * Gate for action handlers where the CALLER is the authenticated user. + * + * 1. Validates JWT via getAuthUserId(ctx) (Convex handles JWT verification) + * 2. Looks up user record to read tier + * 3. Checks provider access, model family, and execution limits + * + * @param ctx Convex action/query/mutation context (must have ctx.auth) + * @param modelId The Bedrock model ID being requested (optional — skips + * model-family check if omitted) + * @param userLookup Function that fetches a user document by ID. + * Typically: `(args) => ctx.runQuery(internal.users.getInternal, args)` + */ +export async function requireBedrockAccess( + ctx: any, + modelId: string | undefined, + userLookup: ( args: { id: any } ) => Promise, +): Promise { + // 1. Authenticate via JWT + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + return { + allowed: false, + reason: "Authentication required to use cloud AI models. Please sign in.", + upgradeMessage: "Sign in to continue.", + }; + } + + // 2. Look up user record + const user = await userLookup( { id: userId } ); + if ( !user ) { + return { + allowed: false, + reason: "User record not found. Please sign in again.", + upgradeMessage: "Sign in to continue.", + }; + } + + // 3. Delegate to the user-doc variant + const result = await requireBedrockAccessForUser( user, modelId ); + if ( result.allowed ) { + // Override userId with the real authenticated user ID + return { ...result, userId: String( userId ) }; + } + return result; +} + +// ─── Variant B: Internal callers with a pre-fetched user doc ───────────────── + +/** + * Gate for internal actions that already have the user document + * (e.g., looked up via agent.createdBy). + * + * Same tier/model/limit checks, also blocks anonymous users. + * + * @param userDoc User document from the users table (or null) + * @param modelId Optional Bedrock model ID for family-level gating + */ +export async function requireBedrockAccessForUser( + userDoc: { + _id?: any; + tier?: string; + executionsThisMonth?: number; + isAnonymous?: boolean; + subscriptionStatus?: string; + currentPeriodEnd?: number; + } | null, + modelId?: string, +): Promise { + if ( !userDoc ) { + return { + allowed: false, + reason: "User record not found.", + upgradeMessage: "Please sign in again.", + }; + } + + // Block anonymous users from Bedrock + if ( userDoc.isAnonymous ) { + return { + allowed: false, + reason: "Anonymous users cannot access cloud AI models. Please create an account.", + upgradeMessage: "Create an account to access Bedrock models.", + }; + } + + const tier = ( userDoc.tier || "freemium" ) as TierName; + const config = getTierConfig( tier ); + + // Payment verification: block paid-tier users with failed/disputed payments + if ( tier === "personal" || tier === "enterprise" ) { + const subStatus = userDoc.subscriptionStatus; + + if ( subStatus === "past_due" ) { + return { + allowed: false, + reason: + "Your payment has failed. Please update your payment method in Settings → Billing to continue using cloud AI models.", + upgradeMessage: "Update payment method to continue.", + }; + } + + if ( subStatus === "disputed" ) { + return { + allowed: false, + reason: + "Your account is restricted due to a payment dispute. Please contact support to resolve this.", + upgradeMessage: "Contact support to restore access.", + }; + } + + if ( subStatus === "canceled" ) { + return { + allowed: false, + reason: + "Your subscription has been canceled. Resubscribe in Settings → Billing to access cloud AI models.", + upgradeMessage: "Resubscribe to continue.", + }; + } + + // Check if subscription period has expired (grace period: 3 days past period end) + const GRACE_PERIOD_MS = 3 * 24 * 60 * 60 * 1000; // 3 days + if ( + userDoc.currentPeriodEnd && + Date.now() > ( userDoc.currentPeriodEnd * 1000 ) + GRACE_PERIOD_MS + ) { + return { + allowed: false, + reason: + "Your subscription period has expired. Please renew in Settings → Billing.", + upgradeMessage: "Renew your subscription to continue.", + }; + } + } + + // Provider-level check: does this tier allow Bedrock at all? + if ( !isProviderAllowedForTier( tier, "bedrock" ) ) { + return { + allowed: false, + reason: + "Bedrock models require a Personal subscription ($5/month). " + + "Use local Ollama models for free, or upgrade in Settings \u2192 Billing.", + upgradeMessage: getUpgradeMessage( tier ), + }; + } + + // Model-family check: is this specific model allowed on this tier? + if ( modelId && !isBedrockModelAllowedForTier( tier, modelId ) ) { + return { + allowed: false, + reason: `Model ${modelId} is not available on the ${config.displayName} tier.`, + upgradeMessage: getUpgradeMessage( tier ), + }; + } + + // Execution limit check + const execCount = userDoc.executionsThisMonth || 0; + const limitResult = checkExecutionLimit( tier, execCount ); + if ( !limitResult.allowed ) { + return { + allowed: false, + reason: `Monthly unit limit reached (${execCount} used). ${ + limitResult.overageAllowed + ? "Overage billing applies at $0.05/unit." + : "Upgrade your plan for more capacity." + }`, + upgradeMessage: getUpgradeMessage( tier ), + }; + } + + return { + allowed: true, + userId: String( userDoc._id || "internal" ), + tier, + }; +} diff --git a/convex/lib/dynamicModelSwitching.ts b/convex/lib/dynamicModelSwitching.ts new file mode 100644 index 0000000..6a70d0a --- /dev/null +++ b/convex/lib/dynamicModelSwitching.ts @@ -0,0 +1,340 @@ +/** + * Dynamic Model Switching for StrandsAgents + * + * Automatically switches between models based on conversation complexity: + * - Simple queries → Haiku (fast, cheap) + * - Complex reasoning → Sonnet (slower, more capable) + * - Multi-step tasks → Opus (slowest, most capable) + * + * This wraps the model call in the agent decorator to enable intelligent routing. + */ + +import type { Doc } from "../_generated/dataModel"; + +type AgentDoc = Doc<"agents">; + +/** + * Model tier configuration + */ +export interface ModelTier { + name: string; + modelId: string; + costPer1KInput: number; // USD + costPer1KOutput: number; // USD + maxTokens: number; + speedRating: 1 | 2 | 3; // 1 = fast, 3 = slow + capabilityRating: 1 | 2 | 3; // 1 = basic, 3 = advanced +} + +/** + * Available model tiers + */ +export const MODEL_TIERS: Record = { + // Fast & Cheap + haiku: { + name: "Claude 4.5 Haiku", + modelId: "anthropic.claude-haiku-4-5-20251001-v1:0", + costPer1KInput: 0.001, + costPer1KOutput: 0.005, + maxTokens: 8000, + speedRating: 1, + capabilityRating: 1, + }, + + // Balanced + sonnet: { + name: "Claude 3.5 Sonnet", + modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + costPer1KInput: 0.003, + costPer1KOutput: 0.015, + maxTokens: 8000, + speedRating: 2, + capabilityRating: 2, + }, + + // Capable (highest auto-selectable tier — Opus is too expensive for auto-selection) + sonnet45: { + name: "Claude Sonnet 4.5", + modelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", + costPer1KInput: 0.003, + costPer1KOutput: 0.015, + maxTokens: 8192, + speedRating: 2, + capabilityRating: 3, + }, +}; + +/** + * Complexity signals detected in messages + */ +export interface ComplexitySignals { + // Message characteristics + messageLength: number; + hasCodeBlocks: boolean; + hasMultipleQuestions: boolean; + hasMath: boolean; + hasLogicalReasoning: boolean; + + // Conversation characteristics + conversationLength: number; + toolCallsInHistory: number; + failedAttempts: number; + + // Explicit indicators + userRequestedThinking: boolean; + userRequestedStepByStep: boolean; + previousModelFailed: boolean; +} + +/** + * Analyze conversation complexity + */ +export function analyzeComplexity( + message: string, + conversationHistory: Array<{ role: string; content: string }> = [] +): ComplexitySignals { + const lowerMessage = message.toLowerCase(); + + // Message characteristics + const messageLength = message.length; + const hasCodeBlocks = message.includes( "```" ) || message.includes( "`" ); + const hasMultipleQuestions = ( message.match( /\?/g ) || [] ).length > 1; + const hasMath = /\d+[\+\-\*\/]\d+|equation|formula|calculate/.test( lowerMessage ); + const hasLogicalReasoning = /because|therefore|if.*then|prove|explain why|analyze|compare/.test( lowerMessage ); + + // Conversation characteristics + const conversationLength = conversationHistory.length; + const toolCallsInHistory = conversationHistory.filter( ( msg ) => + msg.role === "assistant" && msg.content.includes( "tool_use" ) + ).length; + const failedAttempts = conversationHistory.filter( ( msg ) => + msg.role === "assistant" && ( msg.content.includes( "error" ) || msg.content.includes( "failed" ) ) + ).length; + + // Explicit indicators + const userRequestedThinking = /think step by step|reason through|explain your thinking|show your work/.test( + lowerMessage + ); + const userRequestedStepByStep = /step by step|break it down|detailed explanation/.test( lowerMessage ); + const previousModelFailed = failedAttempts > 0; + + return { + messageLength, + hasCodeBlocks, + hasMultipleQuestions, + hasMath, + hasLogicalReasoning, + conversationLength, + toolCallsInHistory, + failedAttempts, + userRequestedThinking, + userRequestedStepByStep, + previousModelFailed, + }; +} + +/** + * Calculate complexity score (0-100) + */ +export function calculateComplexityScore( signals: ComplexitySignals ): number { + let score = 0; + + // Message length (0-20 points) + if ( signals.messageLength > 1000 ) score += 20; + else if ( signals.messageLength > 500 ) score += 15; + else if ( signals.messageLength > 200 ) score += 10; + else score += 5; + + // Code blocks (10 points) + if ( signals.hasCodeBlocks ) score += 10; + + // Multiple questions (10 points) + if ( signals.hasMultipleQuestions ) score += 10; + + // Math (5 points) + if ( signals.hasMath ) score += 5; + + // Logical reasoning (15 points) + if ( signals.hasLogicalReasoning ) score += 15; + + // Conversation length (0-15 points) + if ( signals.conversationLength > 10 ) score += 15; + else if ( signals.conversationLength > 5 ) score += 10; + else if ( signals.conversationLength > 2 ) score += 5; + + // Tool calls (10 points) + if ( signals.toolCallsInHistory > 2 ) score += 10; + else if ( signals.toolCallsInHistory > 0 ) score += 5; + + // Failed attempts (15 points - escalate to better model) + if ( signals.failedAttempts > 1 ) score += 15; + else if ( signals.failedAttempts > 0 ) score += 10; + + // Explicit indicators (20 points each) + if ( signals.userRequestedThinking ) score += 20; + if ( signals.userRequestedStepByStep ) score += 20; + if ( signals.previousModelFailed ) score += 20; + + return Math.min( score, 100 ); +} + +/** + * Select optimal model based on complexity + */ +export function selectModel( + complexityScore: number, + _agent: AgentDoc, + options: { + preferCost?: boolean; // Prefer cheaper models + preferSpeed?: boolean; // Prefer faster models + preferCapability?: boolean; // Prefer more capable models + userTier?: "freemium" | "personal" | "enterprise"; + } = {} +): ModelTier { + const { preferCost = false, preferSpeed = false, preferCapability = false, userTier = "freemium" } = options; + + // All tiers are Bedrock models — Ollama agents use their own execution path + // and should not go through dynamic model switching. + const availableModels = [MODEL_TIERS.haiku, MODEL_TIERS.sonnet, MODEL_TIERS.sonnet45]; + + // Freemium users are limited to free models. For now we return the fast + // Haiku tier (`MODEL_TIERS.haiku`) as the informational selection. Note + // that actual Bedrock execution for freemium users is gated elsewhere via + // tier checks (see `isProviderAllowedForTier`). + if ( userTier === "freemium" ) { + return MODEL_TIERS.haiku; + } + + // Complexity-based routing with preference adjustments + let candidateIndex: number; + if ( complexityScore < 30 ) { + candidateIndex = 0; // Simple → Haiku + } else if ( complexityScore < 60 ) { + candidateIndex = 1; // Moderate → Sonnet + } else { + candidateIndex = 2; // High → Opus-tier + } + + // Apply preference bias (at most ±1 adjustment to avoid overshooting) + const costOrSpeedBias = ( preferCost || preferSpeed ) ? -1 : 0; + const capabilityBias = preferCapability ? 1 : 0; + candidateIndex = Math.max( 0, Math.min( availableModels.length - 1, candidateIndex + costOrSpeedBias + capabilityBias ) ); + + return availableModels[Math.min( candidateIndex, availableModels.length - 1 )]; +} + +/** + * Model switching decision with explanation + */ +export interface ModelSwitchDecision { + selectedModel: ModelTier; + complexityScore: number; + reasoning: string; + estimatedCost: number; + signals: ComplexitySignals; +} + +/** + * Make model switching decision + */ +export function decideModelSwitch( + message: string, + conversationHistory: Array<{ role: string; content: string }>, + agent: AgentDoc, + options: { + preferCost?: boolean; + preferSpeed?: boolean; + preferCapability?: boolean; + userTier?: "freemium" | "personal" | "enterprise"; + } = {} +): ModelSwitchDecision { + // Analyze complexity + const signals = analyzeComplexity( message, conversationHistory ); + const complexityScore = calculateComplexityScore( signals ); + + // Select model + const selectedModel = selectModel( complexityScore, agent, options ); + + // Calculate estimated cost (assuming ~500 tokens input, ~500 tokens output) + const estimatedCost = + ( 500 / 1000 ) * selectedModel.costPer1KInput + ( 500 / 1000 ) * selectedModel.costPer1KOutput; + + // Generate reasoning + let reasoning = `Complexity score: ${complexityScore}/100. `; + + if ( complexityScore < 30 ) { + reasoning += "Simple query detected. Using fast, cost-effective model."; + } else if ( complexityScore < 60 ) { + reasoning += "Moderate complexity detected. Using balanced model."; + } else { + reasoning += "High complexity detected. Using most capable model."; + } + + if ( signals.userRequestedThinking ) { + reasoning += " User requested step-by-step thinking."; + } + + if ( signals.previousModelFailed ) { + reasoning += " Escalating due to previous failure."; + } + + return { + selectedModel, + complexityScore, + reasoning, + estimatedCost, + signals, + }; +} + +/** + * Middleware wrapper for StrandsAgents model calls + * + * This function wraps the original model call and adds dynamic model switching. + * + * Usage in agent decorator: + * ```python + * from bedrock_agentcore_starter_toolkit import Agent, AgentOptions + * + * @app.entrypoint + * def run(): + * agent = Agent( + * agent_name="my_agent", + * model_name=select_dynamic_model(context), # ← Dynamic switching here + * system_prompt="...", + * tools=[...], + * ) + * ``` + */ +export function createModelSwitchingWrapper( + originalModelCall: ( modelId: string, ...args: any[] ) => Promise +) { + return async function switchingModelCall( + message: string, + conversationHistory: Array<{ role: string; content: string }>, + agent: AgentDoc, + defaultModelId: string, + options: { + preferCost?: boolean; + preferSpeed?: boolean; + preferCapability?: boolean; + userTier?: "freemium" | "personal" | "enterprise"; + } = {}, + ...extraArgs: any[] + ): Promise<{ response: any; decision: ModelSwitchDecision }> { + // Make decision + const decision = decideModelSwitch( message, conversationHistory, agent, options ); + + console.log( `[ModelSwitcher] ${decision.reasoning}` ); + console.log( `[ModelSwitcher] Selected: ${decision.selectedModel.name}` ); + console.log( `[ModelSwitcher] Estimated cost: $${decision.estimatedCost.toFixed( 4 )}` ); + + // Call original model with selected model + const response = await originalModelCall( decision.selectedModel.modelId, ...extraArgs ); + + return { + response, + decision, + }; + }; +} diff --git a/convex/lib/memoryStore.ts b/convex/lib/memoryStore.ts new file mode 100644 index 0000000..3b6a39e --- /dev/null +++ b/convex/lib/memoryStore.ts @@ -0,0 +1,130 @@ +/** + * Internal mutations/queries for the toolMemory table. + * + * Used by the memory tool actions (shortTermMemory, longTermMemory, semanticMemory) + * in convex/tools.ts to persist data in the Convex database instead of returning + * mock/placeholder values. + */ + +import { internalMutation, internalQuery } from "../_generated/server"; +import { v } from "convex/values"; + +/** Store (upsert) a memory entry. */ +export const store = internalMutation({ + args: { + userId: v.string(), + memoryType: v.string(), + key: v.string(), + value: v.string(), + metadata: v.optional(v.string()), + ttl: v.optional(v.number()), + }, + handler: async (ctx, args) => { + const now = Date.now(); + + // Check for existing entry to upsert + const existing = await ctx.db + .query("toolMemory") + .withIndex("by_key", (q) => + q.eq("userId", args.userId).eq("memoryType", args.memoryType).eq("key", args.key) + ) + .first(); + + if (existing) { + await ctx.db.patch(existing._id, { + value: args.value, + metadata: args.metadata, + ttl: args.ttl, + updatedAt: now, + }); + return existing._id; + } + + return await ctx.db.insert("toolMemory", { + userId: args.userId, + memoryType: args.memoryType, + key: args.key, + value: args.value, + metadata: args.metadata, + ttl: args.ttl, + createdAt: now, + updatedAt: now, + }); + }, +}); + +/** Retrieve a single memory entry by key. */ +export const retrieve = internalQuery({ + args: { + userId: v.string(), + memoryType: v.string(), + key: v.string(), + }, + handler: async (ctx, args) => { + const entry = await ctx.db + .query("toolMemory") + .withIndex("by_key", (q) => + q.eq("userId", args.userId).eq("memoryType", args.memoryType).eq("key", args.key) + ) + .first(); + + if (!entry) return null; + + // Check TTL expiration + if (entry.ttl && Date.now() - entry.updatedAt > entry.ttl * 1000) { + return null; // Expired + } + + return entry; + }, +}); + +/** Search memory entries by type (returns most recent entries). */ +export const search = internalQuery({ + args: { + userId: v.string(), + memoryType: v.string(), + maxItems: v.optional(v.number()), + }, + handler: async (ctx, args) => { + const limit = args.maxItems || 20; + + // Over-fetch to account for expired entries that will be filtered out + const entries = await ctx.db + .query("toolMemory") + .withIndex("by_type", (q) => + q.eq("userId", args.userId).eq("memoryType", args.memoryType) + ) + .order("desc") + .take(limit * 3); + + // Filter expired entries, then limit to requested count + const now = Date.now(); + return entries + .filter((entry) => !entry.ttl || now - entry.updatedAt <= entry.ttl * 1000) + .slice(0, limit); + }, +}); + +/** Delete a memory entry by key. */ +export const remove = internalMutation({ + args: { + userId: v.string(), + memoryType: v.string(), + key: v.string(), + }, + handler: async (ctx, args) => { + const entry = await ctx.db + .query("toolMemory") + .withIndex("by_key", (q) => + q.eq("userId", args.userId).eq("memoryType", args.memoryType).eq("key", args.key) + ) + .first(); + + if (entry) { + await ctx.db.delete(entry._id); + return true; + } + return false; + }, +}); diff --git a/convex/lib/messageExecutor.ts b/convex/lib/messageExecutor.ts new file mode 100644 index 0000000..36a2faa --- /dev/null +++ b/convex/lib/messageExecutor.ts @@ -0,0 +1,84 @@ +/** + * Server-side message executor - handles actual API calls to Bedrock and Ollama. + * + * This module MUST live under convex/ because it imports the AWS SDK and + * accesses environment variables that are only available in the Node.js runtime. + * + * The pure composition logic (composeWorkflow) stays in src/engine/messageComposer.ts + * so the frontend can use it for previews without pulling in AWS dependencies. + */ + +import type { ComposedMessages } from "../../src/engine/messageComposer"; +import type { TokenUsage } from "./tokenBilling"; + +export async function executeComposedMessages( + composed: ComposedMessages +): Promise<{ text: string; raw: any; tokenUsage?: TokenUsage }> { + if (composed.kind === "tool-only") { + return { + text: "", + raw: { kind: "tool-only" }, + }; + } + + if (composed.kind === "bedrock" && composed.bedrock) { + const { BedrockRuntimeClient, ConverseCommand } = await import( + "@aws-sdk/client-bedrock-runtime" + ); + const accessKeyId = process.env.AWS_ACCESS_KEY_ID; + const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; + if ((accessKeyId && !secretAccessKey) || (secretAccessKey && !accessKeyId)) { + throw new Error("AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must both be set or both be unset"); + } + const client = new BedrockRuntimeClient({ + region: process.env.AWS_REGION || process.env.BEDROCK_REGION || "us-east-1", + credentials: accessKeyId && secretAccessKey + ? { accessKeyId, secretAccessKey } + : undefined, + }); + + const response = await client.send( + new ConverseCommand({ + modelId: composed.bedrock.modelId, + messages: composed.bedrock.messages as any, + inferenceConfig: composed.bedrock.inferenceConfig, + }) + ); + + const text = + response.output?.message?.content + ?.map((content: any) => ("text" in content ? content.text : "")) + .join("") ?? ""; + + // Extract token usage from ConverseCommand response (standardized by AWS SDK) + const { extractTokenUsage } = await import( "./tokenBilling" ); + const tokenUsage = response.usage + ? extractTokenUsage( response.usage, composed.bedrock.modelId ) + : undefined; + + return { text, raw: response, tokenUsage }; + } + + if (composed.kind === "ollama" && composed.ollama) { + const response = await fetch(`${composed.ollama.endpoint}/api/chat`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: composed.ollama.model, + messages: composed.ollama.messages, + stream: false, + }), + }); + + if (!response.ok) { + throw new Error(`Ollama request failed: ${response.status}`); + } + + const json = await response.json(); + const text = json.message?.content ?? ""; + + return { text, raw: json }; + } + + throw new Error(`Unsupported composition kind: ${composed.kind}`); +} diff --git a/convex/lib/roles.ts b/convex/lib/roles.ts new file mode 100644 index 0000000..e60a776 --- /dev/null +++ b/convex/lib/roles.ts @@ -0,0 +1,19 @@ +import { UserRole } from "../users"; + +export const ADMIN_ROLES: UserRole[] = [UserRole.ADMIN]; +export const MODERATOR_ROLES: UserRole[] = [UserRole.ADMIN]; + +export function isAdmin( role: UserRole ): boolean { + return role === UserRole.ADMIN; +} + +export function hasPermission( userRole: UserRole, requiredRole: UserRole ): boolean { + const roleHierarchy: Record = { + [UserRole.GUEST]: 0, + [UserRole.USER]: 1, + [UserRole.PAID]: 2, + [UserRole.ENTERPRISE]: 3, + [UserRole.ADMIN]: 4, // Platform operator — always highest + }; + return roleHierarchy[userRole] >= roleHierarchy[requiredRole]; +} diff --git a/convex/lib/stateValidation.ts b/convex/lib/stateValidation.ts deleted file mode 100644 index c881422..0000000 --- a/convex/lib/stateValidation.ts +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Test Execution State Machine Validator - * - * Enforces valid state transitions for testExecutions to maintain data integrity - */ - -export type TestStatus = - | "CREATED" - | "QUEUED" - | "BUILDING" - | "RUNNING" - | "COMPLETED" - | "FAILED" - | "ABANDONED" - | "ARCHIVED"; - -/** - * Valid state transitions - */ -const VALID_TRANSITIONS: Record = { - CREATED: ["QUEUED"], - QUEUED: ["BUILDING", "ABANDONED"], - BUILDING: ["RUNNING", "FAILED"], - RUNNING: ["COMPLETED", "FAILED"], - COMPLETED: ["ARCHIVED"], - FAILED: ["ARCHIVED"], - ABANDONED: ["QUEUED", "ARCHIVED"], // Can retry if attempts < 3 - ARCHIVED: [], // Terminal state -}; - -/** - * Validate if a state transition is allowed - */ -export function isValidTransition( - currentStatus: TestStatus, - newStatus: TestStatus -): boolean { - const allowedTransitions = VALID_TRANSITIONS[currentStatus]; - return allowedTransitions.includes(newStatus); -} - -/** - * Enforce state transition (throws error if invalid) - */ -export function enforceTransition( - currentStatus: TestStatus, - newStatus: TestStatus -): void { - if (!isValidTransition(currentStatus, newStatus)) { - throw new Error( - `Invalid state transition: ${currentStatus} → ${newStatus}. ` + - `Allowed transitions from ${currentStatus}: ${VALID_TRANSITIONS[currentStatus].join(", ")}` - ); - } -} - -/** - * Get allowed next states for current status - */ -export function getAllowedTransitions(currentStatus: TestStatus): TestStatus[] { - return VALID_TRANSITIONS[currentStatus]; -} - -/** - * Check if a status is terminal (no further transitions possible) - */ -export function isTerminalStatus(status: TestStatus): boolean { - return VALID_TRANSITIONS[status].length === 0; -} - -/** - * Check if a status indicates test is in progress - */ -export function isInProgress(status: TestStatus): boolean { - return ["QUEUED", "BUILDING", "RUNNING"].includes(status); -} - -/** - * Check if a status indicates test is complete (success or failure) - */ -export function isComplete(status: TestStatus): boolean { - return ["COMPLETED", "FAILED", "ABANDONED", "ARCHIVED"].includes(status); -} - -/** - * Get phase from status - */ -export function getPhaseFromStatus(status: TestStatus): string { - switch (status) { - case "CREATED": - case "QUEUED": - return "queued"; - case "BUILDING": - return "building"; - case "RUNNING": - return "running"; - case "COMPLETED": - case "FAILED": - case "ABANDONED": - case "ARCHIVED": - return "completed"; - default: - return "unknown"; - } -} - -/** - * Validate test query input - */ -export function validateTestQuery(query: string): void { - if (!query || query.trim().length === 0) { - throw new Error("Test query cannot be empty"); - } - if (query.length > 2000) { - throw new Error("Test query must be 2000 characters or less"); - } - if (query.includes("\0")) { - throw new Error("Test query cannot contain null bytes"); - } -} - -/** - * Validate timeout value - */ -export function validateTimeout(timeout: number): void { - if (timeout < 10000) { - throw new Error("Timeout must be at least 10 seconds (10000ms)"); - } - if (timeout > 600000) { - throw new Error("Timeout cannot exceed 10 minutes (600000ms)"); - } -} - -/** - * Validate priority value - */ -export function validatePriority(priority: number): void { - if (![1, 2, 3].includes(priority)) { - throw new Error("Priority must be 1 (high), 2 (normal), or 3 (low)"); - } -} - -/** - * Validate model provider - */ -export function validateModelProvider(provider: string): void { - if (!["ollama", "bedrock"].includes(provider)) { - throw new Error("Model provider must be 'ollama' or 'bedrock'"); - } -} diff --git a/convex/lib/strandsTools.ts b/convex/lib/strandsTools.ts new file mode 100644 index 0000000..dc5d1b3 --- /dev/null +++ b/convex/lib/strandsTools.ts @@ -0,0 +1,23 @@ +import { STRANDS_TOOLS_REGISTRY, type StrandsToolMetadata } from "../toolRegistry"; + +export function normalizeToolName(name: string): string { + return name + .toLowerCase() + .replace(/[^a-z0-9]+/g, "_") + .replace(/^_+|_+$/g, ""); +} + +export function findToolMetadata(name: string): StrandsToolMetadata | undefined { + const normalized = normalizeToolName(name); + + if (STRANDS_TOOLS_REGISTRY[normalized]) { + return STRANDS_TOOLS_REGISTRY[normalized]; + } + + return Object.values(STRANDS_TOOLS_REGISTRY).find((tool) => { + return ( + normalizeToolName(tool.name) === normalized || + normalizeToolName(tool.displayName) === normalized + ); + }); +} diff --git a/convex/lib/tierConfig.ts b/convex/lib/tierConfig.ts new file mode 100644 index 0000000..2f73521 --- /dev/null +++ b/convex/lib/tierConfig.ts @@ -0,0 +1,246 @@ +/** + * Tier Configuration - Single Source of Truth + * + * Consolidates tier limits previously scattered across: + * - convex/apiKeys.ts (getTierLimits) + * - convex/agentcoreSetup.ts (inline limits) + * - convex/testExecution.ts (FREE_TESTS_PER_MONTH constants) + * - convex/agentcoreTestExecution.ts (inline limits) + * + * ALL tier-related constants must be imported from this file. + * Do NOT define tier limits anywhere else. + */ + +import { UserRole } from "../users"; + +// ─── Types ─────────────────────────────────────────────────────────────────── + +export type TierName = "freemium" | "personal" | "enterprise"; + +export type AllowedProvider = "ollama" | "lmstudio" | "bedrock"; + +export interface TierConfig { + /** Display name for UI */ + displayName: string; + /** Monthly price in USD (0 for free) */ + monthlyPriceUsd: number; + /** Max cloud executions per billing period (-1 = unlimited) */ + monthlyExecutions: number; + /** Max agents a user can create */ + maxAgents: number; + /** Max concurrent test executions */ + maxConcurrentTests: number; + /** Cost per execution past monthly limit (0 = no overage, hard stop) */ + overageCostPerExecution: number; + /** Whether overage is allowed (personal = yes, others = no) */ + allowOverage: boolean; + /** Model providers accessible at this tier */ + allowedProviders: AllowedProvider[]; + /** Bedrock model families accessible (empty = none) */ + allowedBedrockFamilies: string[]; + /** Feature flags */ + features: { + bedrockAccess: boolean; + customDeployment: boolean; + prioritySupport: boolean; + advancedAnalytics: boolean; + }; +} + +// ─── Configuration ─────────────────────────────────────────────────────────── + +export const TIER_CONFIGS: Record = { + freemium: { + displayName: "Free", + monthlyPriceUsd: 0, + monthlyExecutions: 50, + maxAgents: 5, + maxConcurrentTests: 1, + overageCostPerExecution: 0, + allowOverage: false, + allowedProviders: ["ollama", "lmstudio"], + allowedBedrockFamilies: [], + features: { + bedrockAccess: false, + customDeployment: false, + prioritySupport: false, + advancedAnalytics: false, + }, + }, + personal: { + displayName: "Personal", + monthlyPriceUsd: 5, + monthlyExecutions: 100, + maxAgents: 50, + maxConcurrentTests: 5, + overageCostPerExecution: 0.05, + allowOverage: true, + allowedProviders: ["ollama", "lmstudio", "bedrock"], + allowedBedrockFamilies: [ + "claude-haiku", + "claude-sonnet", + "claude-opus", + "amazon-nova", + "cohere", + "mistral", + "deepseek", + "moonshot", + ], + features: { + bedrockAccess: true, + customDeployment: true, + prioritySupport: false, + advancedAnalytics: false, + }, + }, + enterprise: { + displayName: "Enterprise", + monthlyPriceUsd: -1, // Contact us + monthlyExecutions: -1, // Unlimited + maxAgents: 500, + maxConcurrentTests: 20, + overageCostPerExecution: 0, + allowOverage: false, + allowedProviders: ["ollama", "lmstudio", "bedrock"], + allowedBedrockFamilies: ["*"], // All models + features: { + bedrockAccess: true, + customDeployment: true, + prioritySupport: true, + advancedAnalytics: true, + }, + }, +}; + +// ─── Helper Functions ──────────────────────────────────────────────────────── + +/** + * Get the tier configuration for a given tier name. + * Falls back to freemium if tier is unknown. + */ +export function getTierConfig( tier: string | undefined | null ): TierConfig { + const name = ( tier ?? "freemium" ) as TierName; + return TIER_CONFIGS[name] ?? TIER_CONFIGS.freemium; +} + +/** + * Check if a model provider is allowed for a given tier. + */ +export function isProviderAllowedForTier( + tier: string | undefined | null, + provider: string +): boolean { + const config = getTierConfig( tier ); + return config.allowedProviders.includes( provider as AllowedProvider ); +} + +/** + * Check if a specific Bedrock model family is allowed for a tier. + * Matches against the model ID string (e.g., "claude-haiku" matches "anthropic.claude-haiku-4-5-...") + */ +export function isBedrockModelAllowedForTier( + tier: string | undefined | null, + modelId: string +): boolean { + const config = getTierConfig( tier ); + if ( !config.features.bedrockAccess ) { + return false; + } + // Enterprise gets all models + if ( config.allowedBedrockFamilies.includes( "*" ) ) { + return true; + } + // Check if model ID contains any allowed family + const lowerModelId = modelId.toLowerCase(); + return config.allowedBedrockFamilies.some( + ( family ) => lowerModelId.includes( family.toLowerCase() ) + ); +} + +/** + * Check if the user is within their execution limit. + * Returns { allowed, remaining, overageAllowed } + */ +export function checkExecutionLimit( + tier: string | undefined | null, + currentCount: number +): { allowed: boolean; remaining: number; overageAllowed: boolean } { + const config = getTierConfig( tier ); + + // Unlimited tier + if ( config.monthlyExecutions === -1 ) { + return { allowed: true, remaining: -1, overageAllowed: false }; + } + + const remaining = config.monthlyExecutions - currentCount; + + // Within limit + if ( remaining > 0 ) { + return { allowed: true, remaining, overageAllowed: config.allowOverage }; + } + + // Past limit - check if overage is allowed + if ( config.allowOverage ) { + return { allowed: true, remaining: 0, overageAllowed: true }; + } + + return { allowed: false, remaining: 0, overageAllowed: false }; +} + +/** + * Map a UserRole to the corresponding tier name. + * Used to reconcile the role-based permission system with the tier billing system. + */ +export function getTierForRole( role: string | undefined | null ): TierName { + switch ( role ) { + case UserRole.PAID: + // Both personal and enterprise subscribers get PAID role. + // The actual tier is stored on the user record's "tier" field. + // This returns "personal" as the minimum tier for any paid user. + return "personal"; + case UserRole.ENTERPRISE: + return "enterprise"; + case UserRole.ADMIN: + // Platform operator gets enterprise-level access + return "enterprise"; + case UserRole.USER: + case UserRole.GUEST: + default: + return "freemium"; + } +} + +/** + * Get the UserRole that should be assigned when subscribing to a tier. + * + * ADMIN role is reserved for platform operators only — never auto-assigned + * through billing. Enterprise subscribers get the ENTERPRISE role, which + * has higher permissions than PAID but is NOT admin. + */ +export function getRoleForTier( tier: TierName ): string { + switch ( tier ) { + case "personal": + return UserRole.PAID; + case "enterprise": + return UserRole.ENTERPRISE; + case "freemium": + default: + return UserRole.USER; + } +} + +/** + * Format the tier limits as a human-readable error message for upgrade prompts. + */ +export function getUpgradeMessage( tier: string | undefined | null ): string { + if ( tier === "enterprise" ) { + return "Enterprise tier — contact support if you need higher limits."; + } + const config = getTierConfig( tier ); + const nextTier = tier === "freemium" ? "Personal" : "Enterprise"; + const price = tier === "freemium" ? "$5/month" : "Contact us"; + return ( + `${config.displayName} tier limit reached. ` + + `Upgrade to ${nextTier} (${price}) for more capacity!` + ); +} diff --git a/convex/lib/tokenBilling.ts b/convex/lib/tokenBilling.ts new file mode 100644 index 0000000..89f5c00 --- /dev/null +++ b/convex/lib/tokenBilling.ts @@ -0,0 +1,228 @@ +/** + * Token-Based Billing — Single Source of Truth + * + * Extracts token usage from Bedrock responses (all providers), + * calculates billing units using 2x markup over AWS cost. + * + * Formula: + * awsCost = (inputTokens × inputPer1M / 1M) + (outputTokens × outputPer1M / 1M) + * userCharge = awsCost × 2 + * units = ceil(userCharge / 0.05) // $0.05 per unit, minimum 1 + * + * Imports costPer1MTokens from modelRegistry.ts (the authoritative pricing source). + */ + +// ─── Types ─────────────────────────────────────────────────────────────────── + +export interface TokenUsage { + inputTokens: number; + outputTokens: number; + totalTokens: number; +} + +export interface TokenCostBreakdown { + awsCostUsd: number; + userChargeUsd: number; + units: number; +} + +// ─── Constants ─────────────────────────────────────────────────────────────── + +/** Price per billing unit charged to the user */ +const UNIT_PRICE_USD = 0.05; + +/** Markup multiplier over AWS cost */ +const MARKUP_MULTIPLIER = 2; + +/** + * Approximate tokens-per-character ratio for estimation. + * English text averages ~4 characters per token across most models. + * We round UP to be conservative (charge slightly more than actual). + */ +const CHARS_PER_TOKEN = 4; + +// ─── Token Extraction ──────────────────────────────────────────────────────── + +/** + * Extract token usage from a Bedrock response body, normalizing across + * all provider-specific formats. + * + * For InvokeModelCommand: pass the parsed JSON from response.body. + * For ConverseCommand: pass response.usage directly (standardized by AWS SDK). + * + * Returns zeros if extraction fails. When that happens, callers should + * use estimateTokenUsage(inputText, outputText) for a character-based + * estimate. This ensures we ALWAYS meter usage, even for providers that + * don't return token counts. + */ +export function extractTokenUsage( + responseBody: any, + modelId: string, +): TokenUsage { + if ( !responseBody ) { + return { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; + } + + // ConverseCommand format (AWS SDK standardized): + // response.usage = { inputTokens: N, outputTokens: N } + if ( + responseBody.inputTokens !== undefined && + responseBody.outputTokens !== undefined + ) { + const input = responseBody.inputTokens || 0; + const output = responseBody.outputTokens || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Anthropic Claude / DeepSeek / Mistral / Moonshot (Kimi): + // usage.input_tokens, usage.output_tokens + if ( responseBody.usage?.input_tokens !== undefined ) { + const input = responseBody.usage.input_tokens || 0; + const output = responseBody.usage.output_tokens || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Meta / Llama: + // prompt_token_count, generation_token_count + if ( responseBody.prompt_token_count !== undefined ) { + const input = responseBody.prompt_token_count || 0; + const output = responseBody.generation_token_count || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Amazon Titan: + // inputTokenCount, outputTokenCount (top-level) + if ( responseBody.inputTokenCount !== undefined ) { + const input = responseBody.inputTokenCount || 0; + const output = responseBody.outputTokenCount || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Cohere (billed_units variant): + // meta.billed_units.input_tokens, meta.billed_units.output_tokens + if ( responseBody.meta?.billed_units?.input_tokens !== undefined ) { + const input = responseBody.meta.billed_units.input_tokens || 0; + const output = responseBody.meta.billed_units.output_tokens || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Cohere (legacy variant): + // prompt_tokens, generation_tokens + if ( responseBody.prompt_tokens !== undefined ) { + const input = responseBody.prompt_tokens || 0; + const output = responseBody.generation_tokens || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // AI21: + // usage.prompt_tokens, usage.completion_tokens + if ( responseBody.usage?.prompt_tokens !== undefined ) { + const input = responseBody.usage.prompt_tokens || 0; + const output = responseBody.usage.completion_tokens || 0; + return { inputTokens: input, outputTokens: output, totalTokens: input + output }; + } + + // Could not extract from structured fields — return zeros. + // Callers should use estimateTokenUsage() with the raw text as a fallback. + console.warn( + `[tokenBilling] Could not extract token usage from response for model ${modelId}. ` + + `Use estimateTokenUsage() with input/output text for character-based estimation.`, + ); + return { inputTokens: 0, outputTokens: 0, totalTokens: 0 }; +} + +/** + * Estimate token usage from raw text when the provider does not return + * structured token counts. Uses ~4 characters per token (conservative). + * + * This ensures we ALWAYS bill something, even for providers that don't + * report token counts in their response. + * + * @param inputText The prompt/input text sent to the model + * @param outputText The response text returned by the model + * @returns TokenUsage with estimated counts (always > 0) + */ +export function estimateTokenUsage( + inputText: string, + outputText: string, +): TokenUsage { + const inputTokens = Math.max( 1, Math.ceil( inputText.length / CHARS_PER_TOKEN ) ); + const outputTokens = Math.max( 1, Math.ceil( outputText.length / CHARS_PER_TOKEN ) ); + return { + inputTokens, + outputTokens, + totalTokens: inputTokens + outputTokens, + }; +} + +// ─── Unit Calculation ──────────────────────────────────────────────────────── + +/** + * Calculate billing units from actual token counts. + * + * Reads costPer1MTokens from modelRegistry.ts BEDROCK_MODELS. + * Falls back to Haiku 4.5 pricing ($1/$5) for unknown models. + */ +export function calculateUnitsFromTokens( + modelId: string, + inputTokens: number, + outputTokens: number, +): number { + // Dynamic import avoided — we import the lookup at call time + // to keep this module side-effect-free during testing. + // Callers should pass pricing if needed, but we inline the lookup here. + const pricing = getModelPricing( modelId ); + + const awsCost = + ( inputTokens * pricing.input / 1_000_000 ) + + ( outputTokens * pricing.output / 1_000_000 ); + + const userCharge = awsCost * MARKUP_MULTIPLIER; + const units = Math.ceil( userCharge / UNIT_PRICE_USD ); + + return Math.max( 1, units ); // Minimum 1 unit per call +} + +/** + * Full cost breakdown for analytics/display. + */ +export function calculateTokenCostBreakdown( + modelId: string, + inputTokens: number, + outputTokens: number, +): TokenCostBreakdown { + const pricing = getModelPricing( modelId ); + + const awsCostUsd = + ( inputTokens * pricing.input / 1_000_000 ) + + ( outputTokens * pricing.output / 1_000_000 ); + + const userChargeUsd = awsCostUsd * MARKUP_MULTIPLIER; + const units = Math.max( 1, Math.ceil( userChargeUsd / UNIT_PRICE_USD ) ); + + return { awsCostUsd, userChargeUsd, units }; +} + +// ─── Internal Helpers ──────────────────────────────────────────────────────── + +/** + * Look up per-1M-token pricing from the model registry. + * Falls back to Haiku 4.5 pricing if model is unknown. + */ +function getModelPricing( modelId: string ): { input: number; output: number } { + // Lazy require to avoid circular dependency at module load time. + // modelRegistry.ts is a pure data file so this is safe. + try { + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { BEDROCK_MODELS } = require( "../modelRegistry" ); + const model = BEDROCK_MODELS[modelId]; + if ( model?.costPer1MTokens ) { + return model.costPer1MTokens; + } + } catch { + // Module not available (e.g., in unit tests) — use fallback + } + + // Default: Haiku 4.5 pricing ($1 input / $5 output per 1M) + return { input: 1.0, output: 5.0 }; +} diff --git a/convex/lib/unifiedModalitySwitching.ts b/convex/lib/unifiedModalitySwitching.ts new file mode 100644 index 0000000..ca29f6c --- /dev/null +++ b/convex/lib/unifiedModalitySwitching.ts @@ -0,0 +1,515 @@ +/** + * Unified Modality Switching + * + * Dynamically selects optimal models for ALL modalities: + * - Text (Claude: Haiku → Sonnet → Opus) + * - Image (Titan → Nova Canvas → Stable Diffusion XL) + * - Video (Nova Reel variants) + * - Speech (Polly Standard → Neural) + * + * Single location for all model routing decisions. + */ + +import type { Doc } from "../_generated/dataModel"; + +type AgentDoc = Doc<"agents">; + +/** + * Supported modalities + */ +export type Modality = "text" | "image" | "video" | "speech" | "multimodal"; + +/** + * Model tier for any modality + */ +export interface UnifiedModelTier { + modality: Modality; + name: string; + modelId: string; + costPer1KTokensOrUnit: number; // USD + speedRating: 1 | 2 | 3; // 1 = fast, 3 = slow + qualityRating: 1 | 2 | 3; // 1 = basic, 3 = premium + maxDuration?: number; // For video (seconds) + maxDimensions?: { width: number; height: number }; +} + +/** + * All available models across modalities + */ +export const UNIFIED_MODEL_CATALOG: Record> = { + // TEXT MODELS + text: { + haiku: { + modality: "text", + name: "Claude 4.5 Haiku", + modelId: "anthropic.claude-haiku-4-5-20251001-v1:0", + costPer1KTokensOrUnit: 0.006, // $1/1M input + $5/1M output averaged + speedRating: 1, + qualityRating: 1, + }, + sonnet: { + modality: "text", + name: "Claude 4.5 Sonnet", + modelId: "anthropic.claude-sonnet-4-5-20250929-v1:0", + costPer1KTokensOrUnit: 0.018, // $3/1M input + $15/1M output averaged + speedRating: 2, + qualityRating: 3, + }, + }, + + // IMAGE MODELS + image: { + titan: { + modality: "image", + name: "Amazon Titan Image Generator", + modelId: "amazon.titan-image-generator-v1", + costPer1KTokensOrUnit: 0.008, // $0.008 per image (512x512) + speedRating: 1, + qualityRating: 1, + maxDimensions: { width: 512, height: 512 }, + }, + novaCanvas: { + modality: "image", + name: "Amazon Nova Canvas", + modelId: "amazon.nova-canvas-v1:0", + costPer1KTokensOrUnit: 0.040, // $0.040 per image (1024x1024) + speedRating: 2, + qualityRating: 2, + maxDimensions: { width: 1024, height: 1024 }, + }, + sdxl: { + modality: "image", + name: "Stable Diffusion XL", + modelId: "stability.stable-diffusion-xl-v1", + costPer1KTokensOrUnit: 0.018, // $0.018 per image (1024x1024) + speedRating: 2, + qualityRating: 3, + maxDimensions: { width: 1024, height: 1024 }, + }, + }, + + // VIDEO MODELS + video: { + novaReelStandard: { + modality: "video", + name: "Amazon Nova Reel (Standard)", + modelId: "amazon.nova-reel-v1:0", + costPer1KTokensOrUnit: 0.063, // $0.063 per second + speedRating: 2, + qualityRating: 2, + maxDuration: 6, + }, + novaReelPremium: { + modality: "video", + name: "Amazon Nova Reel (Premium)", + modelId: "amazon.nova-reel-v1:0", // Same model, different config + costPer1KTokensOrUnit: 0.095, // $0.095 per second (higher quality settings) + speedRating: 3, + qualityRating: 3, + maxDuration: 6, + }, + }, + + // SPEECH MODELS + speech: { + pollyStandard: { + modality: "speech", + name: "Amazon Polly (Standard)", + modelId: "polly-standard", + costPer1KTokensOrUnit: 0.004, // $4.00 per 1M characters + speedRating: 1, + qualityRating: 1, + }, + pollyNeural: { + modality: "speech", + name: "Amazon Polly (Neural)", + modelId: "polly-neural", + costPer1KTokensOrUnit: 0.016, // $16.00 per 1M characters + speedRating: 2, + qualityRating: 3, + }, + }, +}; + +/** + * Detect modality from user message + */ +export function detectModality( message: string ): Modality { + const lower = message.toLowerCase(); + + // Check for multiple modalities + const hasVideo = /video|clip|animation|footage|reel|explainer/i.test( lower ); + const hasImage = /image|picture|photo|illustration|graphic|banner|thumbnail/i.test( lower ); + const hasSpeech = /voice|speech|audio|narration|voiceover|tts|text.to.speech/i.test( lower ); + const hasText = true; // Always has text (it's a text message) + + const modalityCount = [hasVideo, hasImage, hasSpeech, hasText].filter( Boolean ).length; + + if ( modalityCount > 2 || ( hasVideo && hasImage ) ) { + return "multimodal"; + } + + if ( hasVideo ) return "video"; + if ( hasImage ) return "image"; + if ( hasSpeech ) return "speech"; + return "text"; +} + +/** + * Complexity signals for ANY modality + */ +export interface UnifiedComplexitySignals { + // Message characteristics + messageLength: number; + hasCodeBlocks: boolean; + hasMultipleQuestions: boolean; + hasTechnicalTerms: boolean; + + // Modality-specific signals + modalitySignals: { + // Image + requestsHighResolution?: boolean; + requestsMultipleVariations?: boolean; + requestsComplexStyle?: boolean; + requestsPhotoRealism?: boolean; + + // Video + requestsLongDuration?: boolean; + requestsMultipleScenes?: boolean; + requestsAnimation?: boolean; + requestsProfessionalQuality?: boolean; + + // Speech + requestsMultipleVoices?: boolean; + requestsEmotionalTone?: boolean; + requestsLongForm?: boolean; + + // Text + requestsDeepReasoning?: boolean; + requestsStepByStep?: boolean; + }; + + // Context + conversationLength: number; + previousFailures: number; + userExplicitQualityRequest: boolean; +} + +/** + * Analyze complexity for ANY modality + */ +export function analyzeUnifiedComplexity( + message: string, + modality: Modality, + conversationHistory: Array<{ role: string; content: string }> = [] +): UnifiedComplexitySignals { + const lower = message.toLowerCase(); + + // Base signals + const messageLength = message.length; + const hasCodeBlocks = /```|`/.test( message ); + const hasMultipleQuestions = ( message.match( /\?/g ) || [] ).length > 1; + const hasTechnicalTerms = /algorithm|architecture|implementation|framework|api/i.test( lower ); + + // Context + const conversationLength = conversationHistory.length; + const previousFailures = conversationHistory.filter( ( m ) => + m.role === "assistant" && /error|failed|couldn't/i.test( m.content ) + ).length; + const userExplicitQualityRequest = /high.quality|premium|professional|best/i.test( lower ); + + // Modality-specific signals + const modalitySignals: UnifiedComplexitySignals["modalitySignals"] = {}; + + switch ( modality ) { + case "image": + modalitySignals.requestsHighResolution = /high.res|hd|4k|large|1920|2048/i.test( lower ); + modalitySignals.requestsMultipleVariations = /variation|multiple|different|several/i.test( + lower + ); + modalitySignals.requestsComplexStyle = /artistic|detailed|intricate|complex|realistic/i.test( + lower + ); + modalitySignals.requestsPhotoRealism = /photorealistic|photo.realistic|like.a.photo/i.test( + lower + ); + break; + + case "video": + modalitySignals.requestsLongDuration = /\d+.second|long|extended|full/i.test( lower ); + modalitySignals.requestsMultipleScenes = /scene|transition|sequence|story/i.test( lower ); + modalitySignals.requestsAnimation = /animate|animation|motion|movement/i.test( lower ); + modalitySignals.requestsProfessionalQuality = /professional|cinematic|polished/i.test( lower ); + break; + + case "speech": + modalitySignals.requestsMultipleVoices = /voices|characters|dialogue/i.test( lower ); + modalitySignals.requestsEmotionalTone = /emotion|expressive|natural|human/i.test( lower ); + modalitySignals.requestsLongForm = /podcast|audiobook|narration|long/i.test( lower ); + break; + + case "text": + modalitySignals.requestsDeepReasoning = /analyze|explain|prove|reason|why/i.test( lower ); + modalitySignals.requestsStepByStep = /step.by.step|break.down|detail/i.test( lower ); + break; + } + + return { + messageLength, + hasCodeBlocks, + hasMultipleQuestions, + hasTechnicalTerms, + modalitySignals, + conversationLength, + previousFailures, + userExplicitQualityRequest, + }; +} + +/** + * Calculate complexity score (0-100) for ANY modality + */ +export function calculateUnifiedComplexityScore( + signals: UnifiedComplexitySignals, + modality: Modality +): number { + let score = 0; + + // Base complexity (0-40 points) + if ( signals.messageLength > 500 ) score += 20; + else if ( signals.messageLength > 200 ) score += 10; + else score += 5; + + if ( signals.hasCodeBlocks ) score += 5; + if ( signals.hasMultipleQuestions ) score += 5; + if ( signals.hasTechnicalTerms ) score += 10; + + // Context (0-20 points) + if ( signals.conversationLength > 10 ) score += 10; + else if ( signals.conversationLength > 5 ) score += 5; + + if ( signals.previousFailures > 0 ) score += 10; + + // Modality-specific complexity (0-40 points) + const ms = signals.modalitySignals; + + switch ( modality ) { + case "image": + if ( ms.requestsHighResolution ) score += 10; + if ( ms.requestsMultipleVariations ) score += 10; + if ( ms.requestsComplexStyle ) score += 10; + if ( ms.requestsPhotoRealism ) score += 10; + break; + + case "video": + if ( ms.requestsLongDuration ) score += 10; + if ( ms.requestsMultipleScenes ) score += 15; + if ( ms.requestsAnimation ) score += 5; + if ( ms.requestsProfessionalQuality ) score += 10; + break; + + case "speech": + if ( ms.requestsMultipleVoices ) score += 15; + if ( ms.requestsEmotionalTone ) score += 10; + if ( ms.requestsLongForm ) score += 15; + break; + + case "text": + if ( ms.requestsDeepReasoning ) score += 20; + if ( ms.requestsStepByStep ) score += 20; + break; + } + + // User explicit quality request (bonus 20 points) + if ( signals.userExplicitQualityRequest ) score += 20; + + return Math.min( score, 100 ); +} + +/** + * Select optimal model for ANY modality + */ +export function selectUnifiedModel( + modality: Modality, + complexityScore: number, + options: { + preferCost?: boolean; + preferSpeed?: boolean; + preferQuality?: boolean; + userTier?: "freemium" | "personal" | "enterprise"; + } = {} +): UnifiedModelTier { + const { preferCost = false, preferSpeed = false, preferQuality = false, userTier = "freemium" } = + options; + + // Fallback "multimodal" to "text" since UNIFIED_MODEL_CATALOG has no multimodal key + const effectiveModality = modality === "multimodal" ? "text" : modality; + const models = UNIFIED_MODEL_CATALOG[effectiveModality]; + if ( !models ) { + throw new Error( `No models available for modality: ${modality}` ); + } + + const availableModels = Object.values( models ); + + // Freemium: always cheapest + if ( userTier === "freemium" ) { + return availableModels.sort( + ( a, b ) => a.costPer1KTokensOrUnit - b.costPer1KTokensOrUnit + )[0]; + } + + // Complexity-based routing + if ( complexityScore < 30 ) { + // Low complexity → Cheapest/Fastest + return availableModels.sort( ( a, b ) => { + if ( preferSpeed ) return a.speedRating - b.speedRating; + return a.costPer1KTokensOrUnit - b.costPer1KTokensOrUnit; + } )[0]; + } else if ( complexityScore < 60 ) { + // Medium complexity → Balanced + return availableModels.sort( ( a, b ) => { + if ( preferQuality ) return b.qualityRating - a.qualityRating; + if ( preferCost ) return a.costPer1KTokensOrUnit - b.costPer1KTokensOrUnit; + return a.speedRating - b.speedRating; + } )[Math.min( 1, availableModels.length - 1 )]; + } else { + // High complexity → Best quality + return availableModels.sort( ( a, b ) => b.qualityRating - a.qualityRating )[0]; + } +} + +/** + * Unified model switching decision + */ +export interface UnifiedModelDecision { + modality: Modality; + selectedModel: UnifiedModelTier; + complexityScore: number; + reasoning: string; + estimatedCost: number; + signals: UnifiedComplexitySignals; +} + +/** + * Make unified model switching decision for ANY modality + */ +export function decideUnifiedModelSwitch( + message: string, + conversationHistory: Array<{ role: string; content: string }>, + agent: AgentDoc, + options: { + preferCost?: boolean; + preferSpeed?: boolean; + preferQuality?: boolean; + userTier?: "freemium" | "personal" | "enterprise"; + explicitModality?: Modality; // Override auto-detection + } = {} +): UnifiedModelDecision { + // Detect modality + const modality = options.explicitModality || detectModality( message ); + + // Analyze complexity + const signals = analyzeUnifiedComplexity( message, modality, conversationHistory ); + const complexityScore = calculateUnifiedComplexityScore( signals, modality ); + + // Select model + const selectedModel = selectUnifiedModel( modality, complexityScore, options ); + + // Calculate estimated cost + let estimatedCost = 0; + switch ( modality ) { + case "text": + estimatedCost = ( 1000 / 1000 ) * selectedModel.costPer1KTokensOrUnit; // 1K tokens avg + break; + case "image": + estimatedCost = selectedModel.costPer1KTokensOrUnit; // Per image + break; + case "video": + estimatedCost = 6 * selectedModel.costPer1KTokensOrUnit; // 6 seconds + break; + case "speech": + estimatedCost = ( 500 / 1000 ) * selectedModel.costPer1KTokensOrUnit; // 500 chars avg + break; + } + + // Generate reasoning + let reasoning = `Modality: ${modality}. Complexity: ${complexityScore}/100. `; + + if ( complexityScore < 30 ) { + reasoning += `Low complexity - using ${selectedModel.name} (fast, cost-effective).`; + } else if ( complexityScore < 60 ) { + reasoning += `Moderate complexity - using ${selectedModel.name} (balanced).`; + } else { + reasoning += `High complexity - using ${selectedModel.name} (premium quality).`; + } + + if ( signals.userExplicitQualityRequest ) { + reasoning += " User requested high quality."; + } + + if ( signals.previousFailures > 0 ) { + reasoning += " Escalating due to previous failures."; + } + + return { + modality, + selectedModel, + complexityScore, + reasoning, + estimatedCost, + signals, + }; +} + +/** + * Get model configuration for execution + */ +export function getModelExecutionConfig( decision: UnifiedModelDecision ): any { + const { modality, selectedModel } = decision; + + switch ( modality ) { + case "text": + return { + modelId: selectedModel.modelId, + maxTokens: 4096, + temperature: 1, + thinking: { + type: "enabled", + budget_tokens: 3000, + }, + }; + + case "image": + return { + modelId: selectedModel.modelId, + taskType: "TEXT_IMAGE", + imageGenerationConfig: { + numberOfImages: 1, + quality: selectedModel.qualityRating === 3 ? "premium" : "standard", + height: selectedModel.maxDimensions?.height || 1024, + width: selectedModel.maxDimensions?.width || 1024, + }, + }; + + case "video": + return { + modelId: selectedModel.modelId, + taskType: "TEXT_VIDEO", + videoGenerationConfig: { + durationSeconds: selectedModel.maxDuration || 6, + fps: selectedModel.qualityRating === 3 ? 60 : 24, + dimension: selectedModel.qualityRating === 3 ? "1920x1080" : "1280x720", + }, + }; + + case "speech": + return { + engine: selectedModel.modelId.includes( "neural" ) ? "neural" : "standard", + voiceId: "Joanna", // Can be configured + languageCode: "en-US", + }; + + default: + throw new Error( `Unknown modality: ${modality}` ); + } +} diff --git a/convex/localModelDetector.ts b/convex/localModelDetector.ts new file mode 100644 index 0000000..143bc93 --- /dev/null +++ b/convex/localModelDetector.ts @@ -0,0 +1,579 @@ +"use node"; + +/** + * Local Model Detection and Management System + * + * Detects and manages local AI models: + * - Ollama (primary) + * - LlamaCpp + * - LMStudio + * - GGUF models + * - Custom implementations + */ + +import { action, internalAction } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import { execSync, spawn } from "child_process"; +import * as fs from "fs"; +import * as path from "path"; +import * as os from "os"; + +interface LocalModelInfo { + provider: 'ollama' | 'llamacpp' | 'lmstudio' | 'gguf' | 'custom'; + name: string; + version: string; + status: 'running' | 'stopped' | 'not_found'; + endpoint?: string; + models?: string[]; + capabilities?: string[]; +} + +interface ModelDetectionResult { + detected: LocalModelInfo[]; + recommendations: string[]; + setupRequired: boolean; +} + +/** + * Detect all available local AI model providers + */ +export const detectLocalModels = internalAction({ + args: {}, + handler: async (ctx): Promise => { + const detected: LocalModelInfo[] = []; + const recommendations: string[] = []; + + // 1. Check Ollama + const ollamaResult = await detectOllama(); + if (ollamaResult.status !== 'not_found') { + detected.push(ollamaResult); + } else { + recommendations.push("Ollama not detected. Install Ollama for local model support."); + } + + // 2. Check LlamaCpp + const llamaCppResult = await detectLlamaCpp(); + if (llamaCppResult.status !== 'not_found') { + detected.push(llamaCppResult); + } + + // 3. Check LMStudio + const lmStudioResult = await detectLMStudio(); + if (lmStudioResult.status !== 'not_found') { + detected.push(lmStudioResult); + } + + // 4. Check for GGUF models + const ggufResult = await detectGGUF(); + if (ggufResult.status !== 'not_found') { + detected.push(ggufResult); + } + + return { + detected, + recommendations, + setupRequired: detected.length === 0 + }; + }, +}); + +/** + * Detect Ollama installation and running models + */ +async function detectOllama(): Promise { + try { + // Check if Ollama is running + const response = await fetch('http://localhost:11434/api/tags', { + method: 'GET', + signal: AbortSignal.timeout(5000) + }); + + if (response.ok) { + const data = await response.json(); + const models = data.models?.map((m: any) => m.name) || []; + + return { + provider: 'ollama', + name: 'Ollama', + version: 'detected', + status: 'running', + endpoint: 'http://localhost:11434', + models, + capabilities: ['text-generation', 'embeddings', 'vision'] + }; + } + } catch (error) { + // Try to check if Ollama binary exists + try { + execSync('ollama --version', { timeout: 5000 }); + return { + provider: 'ollama', + name: 'Ollama', + version: 'installed', + status: 'stopped', + capabilities: ['text-generation', 'embeddings', 'vision'] + }; + } catch { + // Ollama not found + } + } + + return { + provider: 'ollama', + name: 'Ollama', + version: 'not_found', + status: 'not_found' + }; +} + +/** + * Detect LlamaCpp installation + */ +async function detectLlamaCpp(): Promise { + try { + const { execSync } = require('child_process'); + + // Check for llama.cpp binary + const output = execSync('llama-cli --version 2>/dev/null || llama.cpp --version 2>/dev/null || echo "not_found"', { + encoding: 'utf8', + timeout: 5000 + }); + + if (!output.includes('not_found')) { + return { + provider: 'llamacpp', + name: 'LlamaCpp', + version: output.trim(), + status: 'stopped', + capabilities: ['text-generation', 'chat'] + }; + } + } catch (error) { + // LlamaCpp not found + } + + return { + provider: 'llamacpp', + name: 'LlamaCpp', + version: 'not_found', + status: 'not_found' + }; +} + +/** + * Detect LMStudio installation + */ +async function detectLMStudio(): Promise { + try { + const { execSync } = require('child_process'); + + // Check for LMStudio + const output = execSync('lmstudio --version 2>/dev/null || echo "not_found"', { + encoding: 'utf8', + timeout: 5000 + }); + + if (!output.includes('not_found')) { + return { + provider: 'lmstudio', + name: 'LMStudio', + version: output.trim(), + status: 'stopped', + capabilities: ['text-generation', 'chat', 'embeddings'] + }; + } + } catch (error) { + // LMStudio not found + } + + return { + provider: 'lmstudio', + name: 'LMStudio', + version: 'not_found', + status: 'not_found' + }; +} + +/** + * Detect GGUF model files + */ +async function detectGGUF(): Promise { + try { + const fs = require('fs'); + const path = require('path'); + + // Common directories to check for GGUF files + const searchDirs = [ + process.env.HOME || process.env.USERPROFILE, + '/opt/models', + '/usr/local/models', + './models' + ]; + + const foundModels: string[] = []; + + for (const dir of searchDirs) { + if (fs.existsSync(dir)) { + const files = fs.readdirSync(dir, { recursive: true }); + const ggufFiles = files.filter((file: string) => + typeof file === 'string' && file.toLowerCase().endsWith('.gguf') + ); + foundModels.push(...ggufFiles.map((file: string) => path.basename(file))); + } + } + + if (foundModels.length > 0) { + return { + provider: 'gguf', + name: 'GGUF Models', + version: 'detected', + status: 'stopped', + models: foundModels, + capabilities: ['text-generation', 'chat'] + }; + } + } catch (error) { + // GGUF detection failed + } + + return { + provider: 'gguf', + name: 'GGUF Models', + version: 'not_found', + status: 'not_found' + }; +} + +/** + * Install and setup Ollama automatically + */ +export const setupOllama = internalAction({ + args: { + platform: v.string(), // 'windows', 'macos', 'linux' + installModels: v.optional(v.array(v.string())), + }, + handler: async (ctx, args): Promise<{ success: boolean; message: string; endpoint?: string }> => { + try { + const { execSync } = require('child_process'); + const os = require('os'); + const platform = args.platform || os.platform(); + + let installCommand = ''; + + switch (platform) { + case 'win32': + // Windows installation + installCommand = ` + curl -fsSL https://ollama.ai/install.sh | sh + ollama serve & + `; + break; + + case 'darwin': + // macOS installation + installCommand = ` + brew install ollama + brew services start ollama + `; + break; + + case 'linux': + // Linux installation + installCommand = ` + curl -fsSL https://ollama.ai/install.sh | sh + systemctl enable ollama + systemctl start ollama + `; + break; + + default: + return { + success: false, + message: `Unsupported platform: ${platform}` + }; + } + + // Execute installation + execSync(installCommand, { + stdio: 'inherit', + timeout: 300000 // 5 minutes timeout + }); + + // Wait for Ollama to start + await new Promise(resolve => setTimeout(resolve, 5000)); + + // Install requested models + if (args.installModels && args.installModels.length > 0) { + for (const model of args.installModels) { + try { + execSync(`ollama pull ${model}`, { + stdio: 'inherit', + timeout: 600000 // 10 minutes per model + }); + } catch (error) { + console.warn(`Failed to install model ${model}:`, error); + } + } + } + + return { + success: true, + message: 'Ollama installed and configured successfully', + endpoint: 'http://localhost:11434' + }; + + } catch (error: any) { + return { + success: false, + message: `Ollama setup failed: ${error.message}` + }; + } + }, +}); + +/** + * Start local model provider + */ +export const startLocalModel = internalAction({ + args: { + provider: v.union(v.literal('ollama'), v.literal('llamacpp'), v.literal('lmstudio')), + modelName: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ success: boolean; message: string; endpoint?: string }> => { + try { + const { execSync, spawn } = require('child_process'); + + switch (args.provider) { + case 'ollama': + // Start Ollama server + const ollamaProcess = spawn('ollama', ['serve'], { + detached: true, + stdio: 'ignore' + }); + ollamaProcess.unref(); + + // Wait for server to start + await new Promise(resolve => setTimeout(resolve, 3000)); + + return { + success: true, + message: 'Ollama server started', + endpoint: 'http://localhost:11434' + }; + + case 'llamacpp': + if (!args.modelName) { + return { + success: false, + message: 'Model name required for LlamaCpp' + }; + } + + // Start LlamaCpp server + const llamaProcess = spawn('llama-server', ['-m', args.modelName, '--host', '127.0.0.1', '--port', '8080'], { + detached: true, + stdio: 'ignore' + }); + llamaProcess.unref(); + + return { + success: true, + message: 'LlamaCpp server started', + endpoint: 'http://localhost:8080' + }; + + case 'lmstudio': + // Start LMStudio local server + const lmProcess = spawn('lmstudio', ['--local-server'], { + detached: true, + stdio: 'ignore' + }); + lmProcess.unref(); + + return { + success: true, + message: 'LMStudio local server started', + endpoint: 'http://localhost:1234' + }; + + default: + return { + success: false, + message: `Unsupported provider: ${args.provider}` + }; + } + + } catch (error: any) { + return { + success: false, + message: `Failed to start ${args.provider}: ${error.message}` + }; + } + }, +}); + +/** + * Get recommended models for different use cases + */ +export const getRecommendedModels = internalAction({ + args: { + useCase: v.union( + v.literal('chat'), + v.literal('coding'), + v.literal('analysis'), + v.literal('creative'), + v.literal('research') + ), + performance: v.optional(v.union(v.literal('fast'), v.literal('balanced'), v.literal('quality'))), + }, + handler: async (ctx, args): Promise<{ ollama: string[]; llamacpp: string[]; lmstudio: string[] }> => { + const recommendations = { + chat: { + ollama: ['llama3.2:3b', 'mistral:7b', 'phi3:3.8b'], + llamacpp: ['llama-2-7b-chat.Q4_K_M.gguf', 'mistral-7b-instruct-v0.2.Q4_K_M.gguf'], + lmstudio: ['microsoft/DialoGPT-medium', 'microsoft/DialoGPT-large'] + }, + coding: { + ollama: ['codellama:7b', 'deepseek-coder:6.7b', 'starcoder2:3b'], + llamacpp: ['codellama-7b.Q4_K_M.gguf', 'deepseek-coder-6.7b.Q4_K_M.gguf'], + lmstudio: ['bigcode/starcoder', 'WizardLM/WizardCoder-15B-V1.0'] + }, + analysis: { + ollama: ['llama3.1:8b', 'mixtral:8x7b', 'qwen2.5:7b'], + llamacpp: ['llama-3.1-8b-instruct.Q4_K_M.gguf', 'qwen2.5-7b-instruct.Q4_K_M.gguf'], + lmstudio: ['microsoft/wizardlm-2-8x22b', '01-ai/Yi-34B-Chat'] + }, + creative: { + ollama: ['llama3.1:8b', 'mistral:7b', 'zephyr:7b'], + llamacpp: ['llama-3.1-8b-instruct.Q4_K_M.gguf', 'zephyr-7b-beta.Q4_K_M.gguf'], + lmstudio: ['mosaicml/mpt-7b-chat', 'lmsysorg/bakllava-1'] + }, + research: { + ollama: ['llama3.1:70b', 'mixtral:8x7b', 'qwen2.5:72b'], + llamacpp: ['llama-3.1-70b-instruct.Q4_K_M.gguf', 'qwen2.5-72b-instruct.Q4_K_M.gguf'], + lmstudio: ['microsoft/wizardlm-2-8x22b', 'upstage/SOLAR-10.7B-Instruct-v1.0'] + } + }; + + const useCaseRecs = recommendations[args.useCase]; + + // Adjust based on performance preference + if (args.performance) { + switch (args.performance) { + case 'fast': + // Return smaller/faster models + return { + ollama: useCaseRecs.ollama.filter(m => m.includes('3b') || m.includes('7b')), + llamacpp: useCaseRecs.llamacpp.filter(m => m.includes('3b') || m.includes('7b')), + lmstudio: useCaseRecs.lmstudio.slice(0, 2) + }; + case 'quality': + // Return larger/better models + return { + ollama: useCaseRecs.ollama.filter(m => m.includes('70b') || m.includes('72b') || m.includes('8x7b')), + llamacpp: useCaseRecs.llamacpp.filter(m => m.includes('70b') || m.includes('72b')), + lmstudio: useCaseRecs.lmstudio + }; + default: // balanced + return useCaseRecs; + } + } + + return useCaseRecs; + }, +}); + +/** + * Test local model connectivity + */ +export const testLocalModel = internalAction({ + args: { + provider: v.union(v.literal('ollama'), v.literal('llamacpp'), v.literal('lmstudio')), + endpoint: v.string(), + modelName: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ success: boolean; message: string; latency?: number }> => { + const startTime = Date.now(); + + // Validate endpoint to prevent SSRF - only allow localhost connections + const allowedHosts = ["localhost", "127.0.0.1", "0.0.0.0", "::1"]; + try { + const endpointUrl = new URL(args.endpoint); + if (!allowedHosts.includes(endpointUrl.hostname)) { + return { + success: false, + message: `Endpoint host '${endpointUrl.hostname}' is not allowed. Only localhost connections are permitted.`, + }; + } + } catch { + return { success: false, message: `Invalid endpoint URL: ${args.endpoint}` }; + } + + try { + switch (args.provider) { + case 'ollama': + const ollamaResponse = await fetch(`${args.endpoint}/api/generate`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + model: args.modelName || 'llama3.2:3b', + prompt: 'Hello', + stream: false + }), + signal: AbortSignal.timeout(10000) + }); + + if (ollamaResponse.ok) { + return { + success: true, + message: 'Ollama connection successful', + latency: Date.now() - startTime + }; + } + break; + + case 'llamacpp': + const llamaResponse = await fetch(`${args.endpoint}/health`, { + signal: AbortSignal.timeout(5000) + }); + + if (llamaResponse.ok) { + return { + success: true, + message: 'LlamaCpp connection successful', + latency: Date.now() - startTime + }; + } + break; + + case 'lmstudio': + const lmResponse = await fetch(`${args.endpoint}/v1/models`, { + signal: AbortSignal.timeout(5000) + }); + + if (lmResponse.ok) { + return { + success: true, + message: 'LMStudio connection successful', + latency: Date.now() - startTime + }; + } + break; + } + + return { + success: false, + message: `${args.provider} connection failed`, + latency: Date.now() - startTime + }; + + } catch (error: any) { + return { + success: false, + message: `${args.provider} test failed: ${error.message}`, + latency: Date.now() - startTime + }; + } + }, +}); \ No newline at end of file diff --git a/convex/mcpClient.ts b/convex/mcpClient.ts index eb1e1cb..8694fac 100644 --- a/convex/mcpClient.ts +++ b/convex/mcpClient.ts @@ -131,6 +131,21 @@ export const invokeMCPToolInternal = internalAction({ const executionTime = Date.now() - startTime; + // Meter Bedrock usage if this was a direct Bedrock invocation with token data + if ( + result.success && + args.userId && + result.result?.tokenUsage && + ( result.result.tokenUsage.inputTokens > 0 || result.result.tokenUsage.outputTokens > 0 ) + ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: args.userId, + modelId: result.result.model_id, + inputTokens: result.result.tokenUsage.inputTokens, + outputTokens: result.result.tokenUsage.outputTokens, + } ); + } + // Return properly typed result if (result.success) { return { @@ -417,9 +432,7 @@ async function invokeMCPToolDirect( version: "1.0.0", }, { - capabilities: { - tools: {}, - }, + capabilities: {}, } ); @@ -547,15 +560,20 @@ async function invokeBedrockDirect(parameters: any, timeout: number): Promise m.valueScore !== Infinity && m.overallAbility >= minAbility) + .filter(m => m.valueScore > 0 && m.overallAbility >= minAbility) .sort((a, b) => b.valueScore - a.valueScore) .slice(0, limit); } @@ -480,17 +483,39 @@ export function recommendModelForComplexity( ): ModelBenchmarks { // Map complexity to required ability const requiredAbility = complexity * 0.8; // 80% of complexity score - + // Map complexity to max cost const maxCost = complexity < 30 ? 0.5 : complexity < 50 ? 2.0 : complexity < 70 ? 5.0 : 15.0; - + const optimal = findOptimalModel(taskType, maxCost, requiredAbility); - + if (optimal) return optimal; - + // Fallback: return best available return MODEL_BENCHMARKS[0]; } + +/** + * Convex Query: Get all model benchmarks + * Safe for browser consumption + */ +export const getAllBenchmarks = query({ + args: {}, + handler: async () => { + return MODEL_BENCHMARKS; + }, +}); + +/** + * Convex Query: Get benchmark for specific model + * Safe for browser consumption + */ +export const getBenchmarkForModel = query({ + args: { modelId: v.string() }, + handler: async (_, { modelId }) => { + return MODEL_BENCHMARKS.find(m => m.model === modelId); + }, +}); diff --git a/convex/modelRegistry.ts b/convex/modelRegistry.ts index b28cf3b..2fd87d8 100644 --- a/convex/modelRegistry.ts +++ b/convex/modelRegistry.ts @@ -1,9 +1,9 @@ /** * Model Registry - Complete Bedrock and Ollama Models - * + * * Provides comprehensive model metadata for both AWS Bedrock and Ollama providers * with capability tracking, cost estimates, and recommendations. - * + * * Sources: * - docs/update_features.md * - docs/model_capabilities.md @@ -20,10 +20,10 @@ export type ModelProvider = "bedrock" | "ollama" | "openai" | "anthropic" | "azu /** * Model capabilities */ -export type ModelCapability = - | "text" - | "vision" - | "reasoning" +export type ModelCapability = + | "text" + | "vision" + | "reasoning" | "coding" | "image_generation" | "video_generation" @@ -47,6 +47,8 @@ export interface ModelMetadata { input: number; output: number; }; + /** Weighted billing units consumed per call. 1 unit = 1 Haiku-equivalent call ($0.05 Stripe unit). */ + unitsPerCall?: number; description?: string; type?: "text" | "image" | "video" | "embedding"; } @@ -70,11 +72,12 @@ export const BEDROCK_MODELS: Record = { recommended: true, category: "flagship", costPer1MTokens: { input: 3.0, output: 15.0 }, + unitsPerCall: 3, description: "Latest Claude model with interleaved reasoning, best for complex tasks", }, - "us.anthropic.claude-haiku-4-5-20250514-v1:0": { - id: "us.anthropic.claude-haiku-4-5-20250514-v1:0", + "anthropic.claude-haiku-4-5-20251001-v1:0": { + id: "anthropic.claude-haiku-4-5-20251001-v1:0", name: "Claude 4.5 Haiku", provider: "bedrock", providerDisplay: "Anthropic (Bedrock)", @@ -84,114 +87,25 @@ export const BEDROCK_MODELS: Record = { recommended: true, category: "fast", costPer1MTokens: { input: 1.0, output: 5.0 }, + unitsPerCall: 1, description: "Latest fast Claude model with reasoning, perfect for thinking agents and tool creation", }, // ============================================================================ - // CLAUDE 4.1 SERIES + // CLAUDE 4.6 (PREMIUM) // ============================================================================ - "anthropic.claude-opus-4-1-20250805-v1:0": { - id: "anthropic.claude-opus-4-1-20250805-v1:0", - name: "Claude 4.5 Opus", + "anthropic.claude-opus-4-6-v1:0": { + id: "anthropic.claude-opus-4-6-v1:0", + name: "Claude 4.6 Opus", provider: "bedrock", providerDisplay: "Anthropic (Bedrock)", capabilities: ["text", "vision", "reasoning"], contextWindow: 200000, maxOutput: 16384, category: "premium", - costPer1MTokens: { input: 15.0, output: 75.0 }, - description: "Most capable Claude model for complex reasoning tasks", - }, - - // ============================================================================ - // CLAUDE 4.0 SERIES - // ============================================================================ - "anthropic.claude-opus-4-20250514-v1:0": { - id: "anthropic.claude-opus-4-20250514-v1:0", - name: "Claude 4.0 Opus", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text", "vision", "reasoning"], - contextWindow: 200000, - maxOutput: 16384, - category: "premium", - costPer1MTokens: { input: 15.0, output: 75.0 }, - description: "High-performance Claude model for demanding tasks", - }, - - "anthropic.claude-sonnet-4-20250514-v1:0": { - id: "anthropic.claude-sonnet-4-20250514-v1:0", - name: "Claude 4.0 Sonnet", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text", "vision", "reasoning"], - contextWindow: 200000, - maxOutput: 8192, - category: "balanced", - costPer1MTokens: { input: 3.0, output: 15.0 }, - description: "Balanced Claude model for general use", - }, - - // ============================================================================ - // CLAUDE 3.7 SERIES - // ============================================================================ - "anthropic.claude-3-7-sonnet-20250219-v1:0": { - id: "anthropic.claude-3-7-sonnet-20250219-v1:0", - name: "Claude 3.7 Sonnet", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text", "vision"], - contextWindow: 200000, - maxOutput: 8192, - category: "balanced", - costPer1MTokens: { input: 3.0, output: 15.0 }, - description: "Previous generation Claude Sonnet", - }, - - // ============================================================================ - // CLAUDE 3.5 SERIES - // ============================================================================ - "anthropic.claude-3-5-haiku-20241022-v1:0": { - id: "anthropic.claude-3-5-haiku-20241022-v1:0", - name: "Claude 3.5 Haiku", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text", "vision"], - contextWindow: 200000, - maxOutput: 8192, - recommended: true, - category: "fast", - costPer1MTokens: { input: 1.0, output: 5.0 }, - description: "Fast and cost-effective Claude model", - }, - - "anthropic.claude-3-5-sonnet-20240620-v1:0": { - id: "anthropic.claude-3-5-sonnet-20240620-v1:0", - name: "Claude 3.5 Sonnet", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text", "vision"], - contextWindow: 200000, - maxOutput: 8192, - category: "balanced", - costPer1MTokens: { input: 3.0, output: 15.0 }, - description: "Previous Claude 3.5 generation", - }, - - // ============================================================================ - // CLAUDE 3 SERIES - // ============================================================================ - "anthropic.claude-3-haiku-20240307-v1:0": { - id: "anthropic.claude-3-haiku-20240307-v1:0", - name: "Claude 3 Haiku", - provider: "bedrock", - providerDisplay: "Anthropic (Bedrock)", - capabilities: ["text"], - contextWindow: 200000, - maxOutput: 4096, - category: "fast", - costPer1MTokens: { input: 0.25, output: 1.25 }, - description: "Fastest Claude 3 model", + costPer1MTokens: { input: 5.0, output: 25.0 }, + unitsPerCall: 5, + description: "Most capable Claude model — 5x Haiku cost per call", }, // ============================================================================ @@ -207,6 +121,8 @@ export const BEDROCK_MODELS: Record = { maxOutput: 5000, recommended: true, category: "balanced", + costPer1MTokens: { input: 0.80, output: 3.20 }, + unitsPerCall: 1, description: "Amazon's flagship multimodal model", }, @@ -219,6 +135,8 @@ export const BEDROCK_MODELS: Record = { contextWindow: 300000, maxOutput: 5000, category: "fast", + costPer1MTokens: { input: 0.06, output: 0.24 }, + unitsPerCall: 1, description: "Lightweight Nova model for speed", }, @@ -231,6 +149,8 @@ export const BEDROCK_MODELS: Record = { contextWindow: 128000, maxOutput: 5000, category: "fast", + costPer1MTokens: { input: 0.035, output: 0.14 }, + unitsPerCall: 1, description: "Ultra-fast text-only Nova model", }, @@ -243,6 +163,8 @@ export const BEDROCK_MODELS: Record = { contextWindow: 300000, maxOutput: 5000, category: "premium", + costPer1MTokens: { input: 2.50, output: 10.0 }, + unitsPerCall: 3, description: "Most capable Nova model", }, @@ -297,6 +219,8 @@ export const BEDROCK_MODELS: Record = { contextWindow: 32000, maxOutput: 3000, category: "balanced", + costPer1MTokens: { input: 0.50, output: 1.50 }, + unitsPerCall: 1, description: "Amazon Titan text model for general use", }, @@ -326,6 +250,7 @@ export const BEDROCK_MODELS: Record = { recommended: true, category: "flagship", costPer1MTokens: { input: 0.65, output: 0.65 }, + unitsPerCall: 1, description: "Latest Llama 3.3 model with extended context on Bedrock", }, @@ -339,6 +264,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 2048, category: "multimodal", costPer1MTokens: { input: 1.2, output: 1.2 }, + unitsPerCall: 1, description: "Llama 3.2 with vision capabilities on Bedrock", }, @@ -352,6 +278,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 2048, category: "multimodal", costPer1MTokens: { input: 0.35, output: 0.35 }, + unitsPerCall: 1, description: "Compact Llama 3.2 with vision on Bedrock", }, @@ -365,6 +292,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 2048, category: "lightweight", costPer1MTokens: { input: 0.15, output: 0.15 }, + unitsPerCall: 1, description: "Lightweight Llama 3.2 on Bedrock", }, @@ -378,74 +306,10 @@ export const BEDROCK_MODELS: Record = { maxOutput: 2048, category: "lightweight", costPer1MTokens: { input: 0.1, output: 0.1 }, + unitsPerCall: 1, description: "Ultra-compact Llama 3.2 on Bedrock", }, - "meta.llama3-1-405b-instruct-v1:0": { - id: "meta.llama3-1-405b-instruct-v1:0", - name: "Llama 3.1 405B Instruct", - provider: "bedrock", - providerDisplay: "Meta (Bedrock)", - capabilities: ["text"], - contextWindow: 128000, - maxOutput: 4096, - category: "flagship", - costPer1MTokens: { input: 5.32, output: 16.0 }, - description: "Largest Llama 3.1 model on Bedrock", - }, - - "meta.llama3-1-70b-instruct-v1:0": { - id: "meta.llama3-1-70b-instruct-v1:0", - name: "Llama 3.1 70B Instruct", - provider: "bedrock", - providerDisplay: "Meta (Bedrock)", - capabilities: ["text"], - contextWindow: 128000, - maxOutput: 2048, - category: "balanced", - costPer1MTokens: { input: 0.99, output: 0.99 }, - description: "Balanced Llama 3.1 on Bedrock", - }, - - "meta.llama3-1-8b-instruct-v1:0": { - id: "meta.llama3-1-8b-instruct-v1:0", - name: "Llama 3.1 8B Instruct", - provider: "bedrock", - providerDisplay: "Meta (Bedrock)", - capabilities: ["text"], - contextWindow: 128000, - maxOutput: 2048, - category: "lightweight", - costPer1MTokens: { input: 0.22, output: 0.22 }, - description: "Compact Llama 3.1 on Bedrock", - }, - - "meta.llama3-70b-instruct-v1:0": { - id: "meta.llama3-70b-instruct-v1:0", - name: "Llama 3 70B Instruct", - provider: "bedrock", - providerDisplay: "Meta (Bedrock)", - capabilities: ["text"], - contextWindow: 8192, - maxOutput: 2048, - category: "balanced", - costPer1MTokens: { input: 0.99, output: 0.99 }, - description: "Llama 3 70B on Bedrock", - }, - - "meta.llama3-8b-instruct-v1:0": { - id: "meta.llama3-8b-instruct-v1:0", - name: "Llama 3 8B Instruct", - provider: "bedrock", - providerDisplay: "Meta (Bedrock)", - capabilities: ["text"], - contextWindow: 8192, - maxOutput: 2048, - category: "lightweight", - costPer1MTokens: { input: 0.22, output: 0.22 }, - description: "Llama 3 8B on Bedrock", - }, - // ============================================================================ // MISTRAL AI (BEDROCK) // ============================================================================ @@ -460,6 +324,7 @@ export const BEDROCK_MODELS: Record = { recommended: true, category: "flagship", costPer1MTokens: { input: 3.0, output: 9.0 }, + unitsPerCall: 2, description: "Mistral's most capable model on Bedrock", }, @@ -473,6 +338,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 8192, category: "lightweight", costPer1MTokens: { input: 1.0, output: 3.0 }, + unitsPerCall: 1, description: "Compact Mistral model on Bedrock", }, @@ -489,6 +355,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 4096, category: "flagship", costPer1MTokens: { input: 2.0, output: 8.0 }, + unitsPerCall: 2, description: "AI21's flagship model with massive context window", }, @@ -502,6 +369,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 4096, category: "lightweight", costPer1MTokens: { input: 0.2, output: 0.4 }, + unitsPerCall: 1, description: "Compact Jamba model with large context", }, @@ -518,6 +386,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 4096, category: "flagship", costPer1MTokens: { input: 3.0, output: 15.0 }, + unitsPerCall: 3, description: "Cohere's most capable model on Bedrock", }, @@ -531,6 +400,7 @@ export const BEDROCK_MODELS: Record = { maxOutput: 4096, category: "balanced", costPer1MTokens: { input: 0.5, output: 1.5 }, + unitsPerCall: 1, description: "Balanced Cohere model on Bedrock", }, @@ -557,6 +427,56 @@ export const BEDROCK_MODELS: Record = { category: "embeddings", description: "Multilingual text embeddings from Cohere", }, + + // ============================================================================ + // DEEPSEEK (BEDROCK) + // ============================================================================ + "deepseek.r1-v1:0": { + id: "deepseek.r1-v1:0", + name: "DeepSeek R1", + provider: "bedrock", + providerDisplay: "DeepSeek (Bedrock)", + capabilities: ["text", "reasoning"], + contextWindow: 64000, + maxOutput: 8192, + recommended: true, + category: "reasoning", + costPer1MTokens: { input: 1.35, output: 5.40 }, + unitsPerCall: 2, + description: "DeepSeek reasoning model with chain-of-thought on Bedrock — heavier output tokens", + }, + + "deepseek.v3-v1:0": { + id: "deepseek.v3-v1:0", + name: "DeepSeek V3.1", + provider: "bedrock", + providerDisplay: "DeepSeek (Bedrock)", + capabilities: ["text", "reasoning", "coding"], + contextWindow: 64000, + maxOutput: 8192, + recommended: true, + category: "flagship", + costPer1MTokens: { input: 0.58, output: 1.68 }, + unitsPerCall: 1, + description: "DeepSeek V3.1 hybrid model — best value reasoning model on Bedrock", + }, + + // ============================================================================ + // MOONSHOT KIMI (BEDROCK) + // ============================================================================ + "moonshot.kimi-k2-thinking": { + id: "moonshot.kimi-k2-thinking", + name: "Kimi K2 Thinking", + provider: "bedrock", + providerDisplay: "Moonshot AI (Bedrock)", + capabilities: ["text", "reasoning"], + contextWindow: 128000, + maxOutput: 8192, + category: "reasoning", + costPer1MTokens: { input: 1.00, output: 4.00 }, + unitsPerCall: 1, + description: "Moonshot Kimi K2 with chain-of-thought reasoning on Bedrock", + }, }; /** @@ -900,101 +820,193 @@ export const ALL_MODELS = { ...OLLAMA_MODELS, }; +/** + * Short-name to full Bedrock model ID mapping. + * Single source of truth — execution files should import this + * instead of maintaining their own inline modelMap. + */ +export const SHORT_NAME_TO_BEDROCK_ID: Record = { + // Claude 4.6 + "claude-opus-4.6": "anthropic.claude-opus-4-6-v1:0", + + // Claude 4.5 + "claude-sonnet-4.5": "anthropic.claude-sonnet-4-5-20250929-v1:0", + "claude-haiku-4.5": "anthropic.claude-haiku-4-5-20251001-v1:0", + + // Amazon Nova + "nova-pro": "us.amazon.nova-pro-v1:0", + "nova-lite": "us.amazon.nova-lite-v1:0", + "nova-micro": "us.amazon.nova-micro-v1:0", + + // Amazon Titan + "titan-text-premier": "amazon.titan-text-premier-v1:0", + "titan-text-express": "amazon.titan-text-express-v1", + "titan-text-lite": "amazon.titan-text-lite-v1", + + // Meta Llama 3.3 + "llama-3.3-70b": "us.meta.llama3-3-70b-instruct-v1:0", + + // Meta Llama 3.2 + "llama-3.2-90b": "us.meta.llama3-2-90b-instruct-v1:0", + "llama-3.2-11b": "us.meta.llama3-2-11b-instruct-v1:0", + "llama-3.2-3b": "us.meta.llama3-2-3b-instruct-v1:0", + "llama-3.2-1b": "us.meta.llama3-2-1b-instruct-v1:0", + + // Mistral + "mistral-large-2": "mistral.mistral-large-2407-v1:0", + "mistral-small": "mistral.mistral-small-2402-v1:0", + "mixtral-8x7b": "mistral.mixtral-8x7b-instruct-v0:1", + + // AI21 Jamba + "jamba-1.5-large": "ai21.jamba-1-5-large-v1:0", + "jamba-1.5-mini": "ai21.jamba-1-5-mini-v1:0", + + // Cohere Command + "command-r-plus": "cohere.command-r-plus-v1:0", + "command-r": "cohere.command-r-v1:0", + + // DeepSeek + "deepseek-r1": "deepseek.r1-v1:0", + "deepseek-v3": "deepseek.v3-v1:0", + "deepseek-v3.1": "deepseek.v3-v1:0", + + // Moonshot Kimi + "kimi-k2": "moonshot.kimi-k2-thinking", +}; + +/** Bedrock provider prefixes used to identify already-qualified model IDs */ +const BEDROCK_PREFIXES = ["anthropic.", "amazon.", "meta.", "mistral.", "cohere.", "ai21.", "stability.", "deepseek.", "moonshot.", "qwen.", "us.", "eu.", "apac.", "global."]; + +/** + * Resolve a model name (short or full) to a valid Bedrock model ID. + * Checks the full registry, then the short-name map, then falls back to env/default. + */ +export function resolveBedrockModelId( modelName: string ): string { + // Already a fully qualified Bedrock ID + if ( BEDROCK_PREFIXES.some( p => modelName.startsWith( p ) ) ) { + return modelName; + } + // Ollama-style ID (has ":" but no Bedrock prefix) + if ( modelName.includes( ":" ) ) { + return modelName; + } + // Check full registry by key + if ( ALL_MODELS[modelName] ) { + return ALL_MODELS[modelName].id; + } + // Check short-name map + if ( SHORT_NAME_TO_BEDROCK_ID[modelName] ) { + return SHORT_NAME_TO_BEDROCK_ID[modelName]; + } + // Fall back to env var or DeepSeek V3.1 default (cheapest capable model on Bedrock) + return process.env.AGENT_BUILDER_MODEL_ID || "deepseek.v3-v1:0"; +} + +/** + * Look up the weighted billing units for a model. + * Returns unitsPerCall from the registry, defaulting to 1 for unknown models. + * Used by stripeMutations to report weighted usage to Stripe. + */ +export function getUnitsForModel( modelId: string ): number { + const model = BEDROCK_MODELS[modelId] ?? ALL_MODELS[modelId]; + return model?.unitsPerCall ?? 1; +} + /** * Get all available models */ -export const getAllModels = query({ +export const getAllModels = query( { args: {}, handler: async () => { - return Object.values(ALL_MODELS); + return Object.values( ALL_MODELS ); }, -}); +} ); /** * Get models by provider */ -export const getModelsByProvider = query({ +export const getModelsByProvider = query( { args: { provider: v.string() }, - handler: async (ctx, args) => { - return Object.values(ALL_MODELS).filter( + handler: async ( ctx, args ) => { + return Object.values( ALL_MODELS ).filter( model => model.provider === args.provider ); }, -}); +} ); /** * Get models by capability */ -export const getModelsByCapability = query({ +export const getModelsByCapability = query( { args: { capability: v.string() }, - handler: async (ctx, args) => { - return Object.values(ALL_MODELS).filter(model => - model.capabilities.includes(args.capability as ModelCapability) + handler: async ( ctx, args ) => { + return Object.values( ALL_MODELS ).filter( model => + model.capabilities.includes( args.capability as ModelCapability ) ); }, -}); +} ); /** * Get recommended models */ -export const getRecommendedModels = query({ +export const getRecommendedModels = query( { args: {}, handler: async () => { - return Object.values(ALL_MODELS).filter(model => model.recommended); + return Object.values( ALL_MODELS ).filter( model => model.recommended ); }, -}); +} ); /** * Get model by ID */ -export const getModelById = query({ +export const getModelById = query( { args: { modelId: v.string() }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { return ALL_MODELS[args.modelId] || null; }, -}); +} ); /** * Get models by category */ -export const getModelsByCategory = query({ +export const getModelsByCategory = query( { args: { category: v.string() }, - handler: async (ctx, args) => { - return Object.values(ALL_MODELS).filter( + handler: async ( ctx, args ) => { + return Object.values( ALL_MODELS ).filter( model => model.category === args.category ); }, -}); +} ); /** * Search models by name or description */ -export const searchModels = query({ +export const searchModels = query( { args: { query: v.string() }, - handler: async (ctx, args) => { + handler: async ( ctx, args ) => { const query = args.query.toLowerCase(); - return Object.values(ALL_MODELS).filter(model => - model.name.toLowerCase().includes(query) || - model.description?.toLowerCase().includes(query) || - model.id.toLowerCase().includes(query) + return Object.values( ALL_MODELS ).filter( model => + model.name.toLowerCase().includes( query ) || + model.description?.toLowerCase().includes( query ) || + model.id.toLowerCase().includes( query ) ); }, -}); +} ); /** * Get model provider-specific configuration */ -export function getModelConfig(modelId: string): { +export function getModelConfig( modelId: string ): { provider: ModelProvider; imports: string[]; initCode: string; } { const model = ALL_MODELS[modelId]; - if (!model) { - throw new Error(`Model ${modelId} not found`); + if ( !model ) { + throw new Error( `Model ${modelId} not found` ); } - if (model.provider === "bedrock") { + if ( model.provider === "bedrock" ) { return { provider: "bedrock", imports: [ @@ -1017,7 +1029,7 @@ model = BedrockModel( }; } - if (model.provider === "ollama") { + if ( model.provider === "ollama" ) { return { provider: "ollama", imports: [ @@ -1032,5 +1044,5 @@ model = OllamaModel( }; } - throw new Error(`Provider ${model.provider} not supported`); + throw new Error( `Provider ${model.provider} not supported` ); } diff --git a/convex/multiAgentRuntime.ts b/convex/multiAgentRuntime.ts new file mode 100644 index 0000000..0a94d28 --- /dev/null +++ b/convex/multiAgentRuntime.ts @@ -0,0 +1,285 @@ +/** + * Multi-Agent Runtime Handler + * + * Handles execution when user agents use swarm, graph, or workflow tools + * that spawn multiple agents simultaneously or sequentially. + * + * INTEGRATION: Now works with swarmTestingOrchestrator for testing + */ + +import { action, internalMutation } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import type { Id } from "./_generated/dataModel"; + +/** + * Execute multi-agent pattern (swarm/graph/workflow) + * Called when user's agent invokes swarm, graph, or workflow tools + * + * NOTE: Multi-agent sessions and results tables are not yet defined in schema. + * This is a placeholder implementation. + */ +export const executeMultiAgentPattern = action({ + args: { + parentAgentId: v.id("agents"), + parentConversationId: v.optional(v.id("interleavedConversations")), + pattern: v.union(v.literal("swarm"), v.literal("graph"), v.literal("workflow")), + agents: v.array(v.object({ + agentId: v.id("agents"), + role: v.optional(v.string()), + })), + executionMode: v.union(v.literal("parallel"), v.literal("sequential")), + sharedContext: v.optional(v.any()), + }, + handler: async (ctx: any, args: any): Promise => { + // Auth check: verify caller identity before executing multi-agent patterns + const identity = await ctx.auth.getUserIdentity(); + if (!identity) { + throw new Error("Unauthorized: authentication required to execute multi-agent patterns"); + } + + // Verify caller has access to the parent agent + const parentAgent = await ctx.runQuery(api.agents.get, { id: args.parentAgentId }); + if (!parentAgent) { + throw new Error(`Forbidden: parent agent ${args.parentAgentId} not found or access denied`); + } + + console.log(`Multi-agent execution requested: ${args.pattern} mode with ${args.agents.length} agents`); + + try { + // INTEGRATION: Use swarmTestingOrchestrator for actual execution + if (args.pattern === "swarm") { + // Create swarm from tool invocation + const swarmCreation = await ctx.runAction(internal.swarmTestingOrchestrator.createSwarmFromToolInvocation, { + parentAgentId: args.parentAgentId, + toolInvocation: { + toolName: "swarm", + parameters: { + agents: args.agents, + strategy: args.executionMode, + sharedContext: args.sharedContext, + }, + conversationId: args.parentConversationId, + }, + }); + + if (!swarmCreation.success || !swarmCreation.swarmId) { + throw new Error(`Failed to create swarm: ${swarmCreation.message}`); + } + + // Execute the swarm + const swarmExecution = await ctx.runAction(internal.swarmTestingOrchestrator.executeSwarmFromTool, { + swarmId: swarmCreation.swarmId, + toolInvocation: { + toolName: "swarm", + parameters: { + message: args.sharedContext?.task || "Execute swarm operation", + strategy: args.executionMode, + }, + executionMode: args.executionMode === "parallel" ? "parallel" : + args.executionMode === "sequential" ? "sequential" : "orchestrated", + }, + parentConversationId: args.parentConversationId, + }); + + return { + success: swarmExecution.success, + pattern: args.pattern, + executionMode: args.executionMode, + results: swarmExecution.results, + swarmId: swarmCreation.swarmId, + coordinationLog: swarmExecution.coordinationLog, + executionSummary: swarmExecution.executionSummary, + message: swarmExecution.executionSummary, + }; + } + + // For graph and workflow patterns, use similar approach + if (args.pattern === "graph" || args.pattern === "workflow") { + // Create swarm for graph/workflow execution + const swarmCreation = await ctx.runAction(internal.swarmTestingOrchestrator.createSwarmFromToolInvocation, { + parentAgentId: args.parentAgentId, + toolInvocation: { + toolName: args.pattern, + parameters: { + agents: args.agents, + executionMode: args.executionMode, + sharedContext: args.sharedContext, + }, + conversationId: args.parentConversationId, + }, + }); + + if (!swarmCreation.success || !swarmCreation.swarmId) { + throw new Error(`Failed to create ${args.pattern}: ${swarmCreation.message}`); + } + + // Execute the graph/workflow + const execution = await ctx.runAction(internal.swarmTestingOrchestrator.executeSwarmFromTool, { + swarmId: swarmCreation.swarmId, + toolInvocation: { + toolName: args.pattern, + parameters: args.sharedContext || {}, + executionMode: args.executionMode === "parallel" ? "parallel" : + args.executionMode === "sequential" ? "sequential" : "orchestrated", + }, + parentConversationId: args.parentConversationId, + }); + + return { + success: execution.success, + pattern: args.pattern, + executionMode: args.executionMode, + results: execution.results, + swarmId: swarmCreation.swarmId, + coordinationLog: execution.coordinationLog, + executionSummary: execution.executionSummary, + message: execution.executionSummary, + }; + } + + // Fallback for unsupported patterns + const results = await Promise.all( + args.agents.map(async (agent: any) => ({ + agentId: agent.agentId, + role: agent.role || "agent", + success: true, + result: { message: `${args.pattern} pattern not yet fully implemented` }, + })) + ); + + return { + success: true, + pattern: args.pattern, + executionMode: args.executionMode, + results, + message: `${args.pattern} execution completed with basic implementation`, + }; + + } catch (error: any) { + console.error("Multi-agent execution error:", error); + + // Fallback response + const results = args.agents.map((agent: any) => ({ + agentId: agent.agentId, + role: agent.role || "agent", + success: false, + result: { error: error.message }, + })); + + return { + success: false, + pattern: args.pattern, + executionMode: args.executionMode, + results, + error: error.message, + message: `Multi-agent execution failed: ${error.message}`, + }; + } + }, +}); + +/* +// TODO: Implement when multiAgentSessions and multiAgentResults tables are added to schema +async function executeParallel( + ctx: any, + sessionId: Id<"multiAgentSessions">, + agents: Array<{ agentId: Id<"agents">; role?: string }>, + sharedContext?: any +) { + // Implementation commented out until tables exist + return { success: false, message: "Not implemented" }; +} +*/ + +/* +// TODO: Implement when multiAgentSessions and multiAgentResults tables are added to schema +async function executeSequential( + ctx: any, + sessionId: Id<"multiAgentSessions">, + agents: Array<{ agentId: Id<"agents">; role?: string }>, + sharedContext?: any +) { + // Implementation commented out until tables exist + return { success: false, message: "Not implemented" }; +} +*/ + +// Multi-agent session management mutations (NOW ENABLED with schema tables) + +export const createSession = internalMutation({ + args: { + parentAgentId: v.id("agents"), + parentConversationId: v.optional(v.id("interleavedConversations")), + pattern: v.string(), + executionMode: v.string(), + agentIds: v.array(v.id("agents")), + }, + handler: async (ctx, args) => { + return await ctx.db.insert("multiAgentSessions", { + parentAgentId: args.parentAgentId, + parentConversationId: args.parentConversationId, + pattern: args.pattern, + executionMode: args.executionMode, + agentIds: args.agentIds, + status: "running", + startedAt: Date.now(), + }); + }, +}); + +export const createAgentConversation = internalMutation({ + args: { + sessionId: v.id("multiAgentSessions"), + agentId: v.id("agents"), + role: v.optional(v.string()), + }, + handler: async (ctx, args) => { + return await ctx.db.insert("interleavedConversations", { + agentId: args.agentId, + title: `Multi-Agent Session - ${args.role || "Agent"}`, + systemPrompt: "", + contextSize: 0, + createdAt: Date.now(), + updatedAt: Date.now(), + isActive: true, + }); + }, +}); + +export const recordAgentResult = internalMutation({ + args: { + sessionId: v.id("multiAgentSessions"), + agentId: v.id("agents"), + conversationId: v.id("interleavedConversations"), + result: v.any(), + status: v.string(), + startedAt: v.optional(v.number()), + }, + handler: async (ctx, args) => { + const now = Date.now(); + await ctx.db.insert("multiAgentResults", { + sessionId: args.sessionId, + agentId: args.agentId, + conversationId: args.conversationId, + result: args.result, + status: args.status, + startedAt: args.startedAt ?? now, + completedAt: args.status === "running" ? undefined : now, + }); + }, +}); + +export const completeSession = internalMutation({ + args: { + sessionId: v.id("multiAgentSessions"), + results: v.any(), + }, + handler: async (ctx, args) => { + await ctx.db.patch(args.sessionId, { + status: "completed", + completedAt: Date.now(), + result: args.results, + }); + }, +}); diff --git a/convex/multiEnvironmentExecution.test.ts b/convex/multiEnvironmentExecution.test.ts deleted file mode 100644 index 7e84227..0000000 --- a/convex/multiEnvironmentExecution.test.ts +++ /dev/null @@ -1,629 +0,0 @@ -/** - * Multi-Environment Execution Tests - * - * Tests agent execution across different environments: - * 1. Docker/ECS Fargate (for Ollama models) - * 2. AWS Bedrock AgentCore (for Bedrock models) - * 3. Local testing environment - * - * Validates: - * - Model configuration correctness - * - Environment variable access - * - Consistent results across environments - * - MCP tool integration - * - Agent-as-tool functionality - * - * Requirements: 7.1-7.7 - */ - -import { convexTest } from "convex-test"; -import { expect, test, describe, beforeEach, afterEach } from "vitest"; -import schema from "./schema"; -import { api, internal } from "./_generated/api"; -import { Id } from "./_generated/dataModel"; -import { BEDROCK_MODELS, OLLAMA_MODELS } from "./modelRegistry"; - -const modules = import.meta.glob("./**/*.ts"); - -describe("Multi-Environment Agent Execution", () => { - let t: any; - let testUserId: Id<"users">; - - beforeEach(async () => { - t = convexTest(schema, modules); - - // Create test user - testUserId = await t.run(async (ctx: any) => { - return await ctx.db.insert("users", { - userId: "test-user-multi-env", - email: "test@multienv.com", - name: "Multi-Env Test User", - tier: "personal", - testsThisMonth: 0, - createdAt: Date.now(), - isAnonymous: false, - }); - }); - - t = t.withIdentity({ subject: testUserId }); - }); - - afterEach(async () => { - // Cleanup test data - if (t && testUserId) { - await t.run(async (ctx: any) => { - // Delete test executions - const tests = await ctx.db - .query("testExecutions") - .withIndex("by_user", (q: any) => q.eq("userId", testUserId)) - .collect(); - - for (const test of tests) { - await ctx.db.delete(test._id); - } - - // Delete agents - const agents = await ctx.db - .query("agents") - .filter((q: any) => q.eq(q.field("createdBy"), testUserId)) - .collect(); - - for (const agent of agents) { - await ctx.db.delete(agent._id); - } - - // Delete user - await ctx.db.delete(testUserId); - }); - } - }); - - describe("Docker Environment (Ollama Models)", () => { - test("should execute agent with Llama 3.3 model in Docker", async () => { - // Requirement 7.1: Docker environment execution - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Llama Docker Agent", - description: "Test agent using Llama 3.3", - model: "llama3.3", - systemPrompt: "You are a helpful assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass LlamaAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "What is 2 + 2?", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("ollama"); - expect(test.modelConfig.testEnvironment).toBe("docker"); - expect(test.modelConfig.modelId).toBe("llama3.3"); - expect(test.modelConfig.baseUrl).toBeDefined(); - expect(test.dockerfile).toContain("python:3.11"); - expect(test.requirements).toContain("ollama"); - }); - - test("should execute agent with Qwen3 Coder model in Docker", async () => { - // Requirement 7.1: Docker environment with coding model - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Qwen Coder Agent", - description: "Test agent using Qwen3 Coder", - model: "qwen3-coder:30b", - systemPrompt: "You are a coding assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass QwenCoderAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Write a Python function to calculate fibonacci", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("ollama"); - expect(test.modelConfig.modelId).toBe("qwen3-coder:30b"); - expect(test.modelConfig.testEnvironment).toBe("docker"); - }); - - test("should execute agent with DeepSeek R1 reasoning model in Docker", async () => { - // Requirement 7.1: Docker environment with reasoning model - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "DeepSeek Reasoning Agent", - description: "Test agent using DeepSeek R1", - model: "deepseek-r1:8b", - systemPrompt: "You are a reasoning assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass DeepSeekAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Solve this logic puzzle: If all A are B, and all B are C, what can we conclude?", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("ollama"); - expect(test.modelConfig.modelId).toBe("deepseek-r1:8b"); - expect(test.modelConfig.testEnvironment).toBe("docker"); - }); - }); - - describe("AgentCore Environment (Bedrock Models)", () => { - test("should execute agent with Claude 4.5 Sonnet in AgentCore", async () => { - // Requirement 7.2: AgentCore environment execution - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Claude Sonnet Agent", - description: "Test agent using Claude 4.5 Sonnet", - model: "anthropic.claude-sonnet-4-5-20250929-v1:0", - systemPrompt: "You are a helpful assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ClaudeAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Explain quantum computing", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("bedrock"); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - expect(test.modelConfig.modelId).toBe("anthropic.claude-sonnet-4-5-20250929-v1:0"); - expect(test.modelConfig.region).toBeDefined(); - expect(test.dockerfile).toContain("--platform=linux/arm64"); - expect(test.dockerfile).toContain("agentcore_server.py"); - expect(test.requirements).toContain("bedrock-agentcore"); - }); - - test("should execute agent with Claude 4.5 Haiku in AgentCore", async () => { - // Requirement 7.2: AgentCore with fast model - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Claude Haiku Agent", - description: "Test agent using Claude 4.5 Haiku", - model: "anthropic.claude-haiku-4-5-20250514-v1:0", - systemPrompt: "You are a fast assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ClaudeHaikuAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Quick summary of photosynthesis", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("bedrock"); - expect(test.modelConfig.modelId).toBe("anthropic.claude-haiku-4-5-20250514-v1:0"); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - }); - - test("should execute agent with Amazon Nova Pro in AgentCore", async () => { - // Requirement 7.2: AgentCore with Amazon model - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Nova Pro Agent", - description: "Test agent using Amazon Nova Pro", - model: "amazon.nova-pro-v1:0", - systemPrompt: "You are a helpful assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass NovaAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Describe machine learning", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("bedrock"); - expect(test.modelConfig.modelId).toBe("amazon.nova-pro-v1:0"); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - }); - }); - - describe("Environment Variable Access", () => { - test("should include environment variables in Docker execution", async () => { - // Requirement 7.5: Environment variable access - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "EnvVar Docker Agent", - description: "Test agent with environment variables", - model: "llama3.3", - systemPrompt: "You are a helpful assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\nimport os\n\nclass EnvVarAgent(Agent):\n def __init__(self):\n # Environment variables would be set in container\n self.api_key = os.getenv('API_KEY', 'default')\n self.log_level = os.getenv('LOG_LEVEL', 'INFO')\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "test", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - // Verify environment variables are included in model config - expect(test.modelConfig).toBeDefined(); - expect(test.modelConfig.baseUrl).toBeDefined(); - expect(test.modelConfig.testEnvironment).toBe("docker"); - }); - - test("should include AWS environment variables in AgentCore execution", async () => { - // Requirement 7.5: AWS environment variables - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "EnvVar Bedrock Agent", - description: "Test agent with AWS environment variables", - model: "anthropic.claude-sonnet-4-5-20250929-v1:0", - systemPrompt: "You are a helpful assistant.", - tools: [], - generatedCode: `from strands_agents import Agent\nimport os\n\nclass AWSEnvAgent(Agent):\n def __init__(self):\n # AWS environment variables would be set in AgentCore\n self.aws_region = os.getenv('AWS_REGION', 'us-east-1')\n self.log_level = os.getenv('LOG_LEVEL', 'INFO')\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "test", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - // Verify AWS environment variables are included - expect(test.modelConfig.region).toBeDefined(); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - }); - }); - - describe("Model Configuration Validation", () => { - test("should validate all Bedrock model IDs are correctly configured", async () => { - // Requirement 7.7: Model ID validation - const bedrockModelIds = Object.keys(BEDROCK_MODELS); - - // Test a sample of models - const sampleModels = [ - "anthropic.claude-sonnet-4-5-20250929-v1:0", - "anthropic.claude-haiku-4-5-20250514-v1:0", - "amazon.nova-pro-v1:0", - "anthropic.claude-3-5-haiku-20241022-v1:0", - ]; - - for (const modelId of sampleModels) { - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: `Test Agent ${modelId}`, - description: "Model validation test", - model: modelId, - systemPrompt: "Test", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass TestAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "test", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("bedrock"); - expect(test.modelConfig.modelId).toBe(modelId); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - expect(BEDROCK_MODELS[modelId]).toBeDefined(); - } - }); - - test("should validate all Ollama model IDs are correctly configured", async () => { - // Requirement 7.7: Ollama model ID validation - const ollamaModelIds = Object.keys(OLLAMA_MODELS); - - // Test a sample of models - const sampleModels = [ - "llama3.3", - "qwen3:8b", - "phi4:14b", - "deepseek-r1:8b", - ]; - - for (const modelId of sampleModels) { - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: `Test Agent ${modelId}`, - description: "Model validation test", - model: modelId, - systemPrompt: "Test", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass TestAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "test", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("ollama"); - expect(test.modelConfig.modelId).toBe(modelId); - expect(test.modelConfig.testEnvironment).toBe("docker"); - expect(OLLAMA_MODELS[modelId]).toBeDefined(); - } - }); - }); - - describe("Cross-Environment Consistency", () => { - test("should produce consistent agent structure across environments", async () => { - // Requirement 7.4: Consistent results across environments - - // Create Ollama agent - const ollamaAgentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Consistency Test Ollama", - description: "Test consistency", - model: "llama3.3", - systemPrompt: "You are a helpful assistant. Always respond with 'Hello, World!'", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ConsistencyAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - // Create Bedrock agent with same prompt - const bedrockAgentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Consistency Test Bedrock", - description: "Test consistency", - model: "anthropic.claude-sonnet-4-5-20250929-v1:0", - systemPrompt: "You are a helpful assistant. Always respond with 'Hello, World!'", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ConsistencyAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - // Submit tests - const ollamaResult = await t.mutation(api.testExecution.submitTest, { - agentId: ollamaAgentId, - testQuery: "Say hello", - }); - - const bedrockResult = await t.mutation(api.testExecution.submitTest, { - agentId: bedrockAgentId, - testQuery: "Say hello", - }); - - // Get test configurations - const ollamaTest = await t.query(api.testExecution.getTestById, { - testId: ollamaResult.testId, - }); - - const bedrockTest = await t.query(api.testExecution.getTestById, { - testId: bedrockResult.testId, - }); - - // Verify both have proper structure - expect(ollamaTest.agentCode).toBeDefined(); - expect(bedrockTest.agentCode).toBeDefined(); - expect(ollamaTest.requirements).toBeDefined(); - expect(bedrockTest.requirements).toBeDefined(); - expect(ollamaTest.dockerfile).toBeDefined(); - expect(bedrockTest.dockerfile).toBeDefined(); - - // Verify environment-specific differences - expect(ollamaTest.modelProvider).toBe("ollama"); - expect(bedrockTest.modelProvider).toBe("bedrock"); - expect(ollamaTest.modelConfig.testEnvironment).toBe("docker"); - expect(bedrockTest.modelConfig.testEnvironment).toBe("agentcore"); - }); - }); - - describe("MCP Tool Integration in Multi-Environment", () => { - test("should execute agent with MCP tools in Docker environment", async () => { - // Requirement 7.3: MCP integration in Docker - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "MCP Docker Agent", - description: "Test agent with MCP tools", - model: "llama3.3", - systemPrompt: "You are a helpful assistant with tools.", - tools: [ - { name: "search", type: "search", config: {} }, - { name: "calculator", type: "calculator", config: {} }, - ], - generatedCode: `from strands_agents import Agent\n\nclass MCPAgent(Agent):\n pass`, - deploymentType: "ollama", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Search for Python tutorials", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("ollama"); - expect(test.modelConfig.testEnvironment).toBe("docker"); - // Tools should be included in requirements - expect(test.requirements).toContain("strands-agents-tools"); - }); - - test("should execute agent with MCP tools in AgentCore environment", async () => { - // Requirement 7.3: MCP integration in AgentCore - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "MCP Bedrock Agent", - description: "Test agent with MCP tools", - model: "anthropic.claude-sonnet-4-5-20250929-v1:0", - systemPrompt: "You are a helpful assistant with tools.", - tools: [ - { name: "file_read", type: "file_read", config: {} }, - { name: "http_request", type: "http_request", config: {} }, - ], - generatedCode: `from strands_agents import Agent\n\nclass MCPAgent(Agent):\n pass`, - deploymentType: "bedrock", - createdBy: testUserId, - isPublic: false, - }); - }); - - const result = await t.mutation(api.testExecution.submitTest, { - agentId, - testQuery: "Read a file", - }); - - const test = await t.query(api.testExecution.getTestById, { - testId: result.testId, - }); - - expect(test.modelProvider).toBe("bedrock"); - expect(test.modelConfig.testEnvironment).toBe("agentcore"); - expect(test.requirements).toContain("strands-agents-tools"); - }); - }); - - describe("Agent-as-Tool in Multi-Environment", () => { - test("should expose Docker agent as MCP tool", async () => { - // Requirement 7.6: Agent-as-tool in Docker - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Tool Docker Agent", - description: "Agent exposed as tool", - model: "llama3.3", - systemPrompt: "You are a calculator agent.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ToolAgent(Agent):\n pass`, - deploymentType: "ollama", - exposableAsMCPTool: true, - mcpToolName: "docker_calculator", - mcpInputSchema: { - type: "object", - properties: { - expression: { type: "string" }, - }, - }, - createdBy: testUserId, - isPublic: false, - }); - }); - - const agent = await t.run(async (ctx: any) => { - return await ctx.db.get(agentId); - }); - - expect(agent.exposableAsMCPTool).toBe(true); - expect(agent.mcpToolName).toBe("docker_calculator"); - expect(agent.deploymentType).toBe("ollama"); - }); - - test("should expose AgentCore agent as MCP tool", async () => { - // Requirement 7.6: Agent-as-tool in AgentCore - const agentId = await t.run(async (ctx: any) => { - return await ctx.db.insert("agents", { - name: "Tool Bedrock Agent", - description: "Agent exposed as tool", - model: "anthropic.claude-sonnet-4-5-20250929-v1:0", - systemPrompt: "You are a summarization agent.", - tools: [], - generatedCode: `from strands_agents import Agent\n\nclass ToolAgent(Agent):\n pass`, - deploymentType: "bedrock", - exposableAsMCPTool: true, - mcpToolName: "bedrock_summarizer", - mcpInputSchema: { - type: "object", - properties: { - text: { type: "string" }, - }, - }, - createdBy: testUserId, - isPublic: false, - }); - }); - - const agent = await t.run(async (ctx: any) => { - return await ctx.db.get(agentId); - }); - - expect(agent.exposableAsMCPTool).toBe(true); - expect(agent.mcpToolName).toBe("bedrock_summarizer"); - expect(agent.deploymentType).toBe("bedrock"); - }); - }); -}); diff --git a/convex/ollamaInstaller.ts b/convex/ollamaInstaller.ts new file mode 100644 index 0000000..85ce2d1 --- /dev/null +++ b/convex/ollamaInstaller.ts @@ -0,0 +1,231 @@ +"use node"; + +/** + * Ollama Auto-Installer + * Downloads and installs Ollama for users automatically + */ + +import { action } from "./_generated/server"; +import { v } from "convex/values"; + +interface InstallationStep { + step: string; + status: "pending" | "in_progress" | "completed" | "failed"; + progress: number; + message: string; +} + +/** + * Get Ollama installer URL based on platform + */ +export const getInstallerInfo = action({ + args: { + platform: v.union(v.literal("windows"), v.literal("macos"), v.literal("linux")), + }, + handler: async (_ctx, { platform }) => { + const installers = { + windows: { + url: "https://ollama.com/download/OllamaSetup.exe", + filename: "OllamaSetup.exe", + size: "45MB", + instructions: [ + "Download will start automatically", + "Run OllamaSetup.exe when complete", + "Follow installation wizard", + "Ollama will start automatically after install", + ], + }, + macos: { + url: "https://ollama.com/download/Ollama-darwin.zip", + filename: "Ollama-darwin.zip", + size: "40MB", + instructions: [ + "Download will start automatically", + "Extract the .zip file", + "Drag Ollama.app to Applications folder", + "Open Ollama from Applications", + ], + }, + linux: { + url: "https://ollama.com/install.sh", + filename: "install.sh", + size: "1KB", + command: "curl -fsSL https://ollama.com/install.sh | sh", + instructions: [ + "Open Terminal", + "Run: curl -fsSL https://ollama.com/install.sh | sh", + "Script will install Ollama automatically", + "Ollama will be available as 'ollama' command", + ], + }, + }; + + return installers[platform]; + }, +}); + +/** + * Guide user through Ollama installation + */ +export const generateInstallGuide = action({ + args: { + platform: v.union(v.literal("windows"), v.literal("macos"), v.literal("linux")), + }, + handler: async (_ctx, { platform }) => { + const downloadUrls = { + windows: "https://ollama.com/download/OllamaSetup.exe", + macos: "https://ollama.com/download/Ollama-darwin.zip", + linux: "https://ollama.com/install.sh", + }; + + const guides = { + windows: ` +# Install Ollama on Windows + +## Step 1: Download Ollama +Visit: https://ollama.com/download +Or direct download: https://ollama.com/download/OllamaSetup.exe + +## Step 2: Run Installer +1. Double-click OllamaSetup.exe +2. Click "Yes" if Windows asks for permission +3. Follow the installation wizard +4. Ollama will start automatically + +## Step 3: Verify Installation +Open Command Prompt and run: +\`\`\` +ollama --version +\`\`\` + +## Step 4: Pull Your First Model +\`\`\` +ollama pull llama3.2:3b +\`\`\` + +## Done! +Ollama is now running at http://127.0.0.1:11434 + `, + + macos: ` +# Install Ollama on macOS + +## Step 1: Download Ollama +Visit: https://ollama.com/download +Or direct download: https://ollama.com/download/Ollama-darwin.zip + +## Step 2: Install +1. Open the downloaded .zip file +2. Drag Ollama to your Applications folder +3. Open Ollama from Applications +4. Ollama will appear in your menu bar + +## Step 3: Verify Installation +Open Terminal and run: +\`\`\`bash +ollama --version +\`\`\` + +## Step 4: Pull Your First Model +\`\`\`bash +ollama pull llama3.2:3b +\`\`\` + +## Done! +Ollama is now running at http://127.0.0.1:11434 + `, + + linux: ` +# Install Ollama on Linux + +## Step 1: Install with Script +Open Terminal and run: +\`\`\`bash +curl -fsSL https://ollama.com/install.sh | sh +\`\`\` + +This will: +- Download Ollama +- Install it to /usr/local/bin +- Start the Ollama service + +## Step 2: Verify Installation +\`\`\`bash +ollama --version +\`\`\` + +## Step 3: Pull Your First Model +\`\`\`bash +ollama pull llama3.2:3b +\`\`\` + +## Done! +Ollama is now running at http://127.0.0.1:11434 + `, + }; + + return { + platform, + guide: guides[platform], + downloadUrl: downloadUrls[platform], + }; + }, +}); + +/** + * Get recommended Ollama models for agent testing + */ +export const getRecommendedModels = action({ + args: {}, + handler: async () => { + return [ + { + name: "llama3.2:1b", + size: "1.3GB", + ram: "2GB", + speed: "Very Fast", + quality: "Good", + description: "Fastest option, good for testing", + command: "ollama pull llama3.2:1b", + recommended: true, + }, + { + name: "llama3.2:3b", + size: "2.0GB", + ram: "3GB", + speed: "Fast", + quality: "Very Good", + description: "Best balance of speed and quality", + command: "ollama pull llama3.2:3b", + recommended: true, + }, + { + name: "llama3.1:8b", + size: "4.7GB", + ram: "6GB", + speed: "Medium", + quality: "Excellent", + description: "High quality, slower", + command: "ollama pull llama3.1:8b", + }, + { + name: "qwen2.5-coder:7b", + size: "4.0GB", + ram: "5GB", + speed: "Medium", + quality: "Excellent", + description: "Specialized for code generation", + command: "ollama pull qwen2.5-coder:7b", + }, + { + name: "mistral:7b", + size: "4.1GB", + ram: "5GB", + speed: "Medium", + quality: "Excellent", + description: "Alternative general-purpose model", + command: "ollama pull mistral:7b", + }, + ]; + }, +}); diff --git a/convex/ollamaMCPIntegration.ts b/convex/ollamaMCPIntegration.ts new file mode 100644 index 0000000..fccfe99 --- /dev/null +++ b/convex/ollamaMCPIntegration.ts @@ -0,0 +1,338 @@ +"use node"; + +/** + * Ollama Direct API Integration for Agent Testing + * + * Calls Ollama HTTP API directly (no MCP server needed) + * Provides FREE, unlimited testing for local models + */ + +import { action } from "./_generated/server"; +import { v } from "convex/values"; +import { internal } from "./_generated/api"; + +/** Resolve the Ollama endpoint once from env (falls back to localhost) */ +function getOllamaEndpoint(): string { + return process.env.OLLAMA_ENDPOINT || "http://127.0.0.1:11434"; +} + +/** + * Test agent with Ollama model via MCP + */ +export const testAgentWithOllama = action({ + args: { + agentId: v.id("agents"), + modelName: v.string(), // e.g., "llama3.2:3b" + testMessage: v.string(), + }, + handler: async (ctx, { agentId, modelName, testMessage }) => { + // Get agent configuration + const agent = await ctx.runQuery(internal.agents.getInternal, { id: agentId }); + + if (!agent) { + throw new Error("Agent not found"); + } + + // Call Ollama via MCP + const ollamaResponse = await callOllamaMCP({ + model: modelName, + prompt: testMessage, + systemPrompt: agent.systemPrompt || "", + }); + + return { + success: true, + model: modelName, + response: ollamaResponse, + agentId, + }; + }, +}); + +/** + * List available Ollama models via MCP + */ +export const listOllamaModels = action({ + args: {}, + handler: async (ctx) => { + try { + const endpoint = getOllamaEndpoint(); + const response = await fetch(`${endpoint}/api/tags`); + + if (!response.ok) { + return { + success: false, + error: "Ollama not running or not accessible", + models: [], + }; + } + + const data = await response.json(); + + return { + success: true, + models: data.models || [], + }; + } catch (error: any) { + return { + success: false, + error: error.message, + models: [], + }; + } + }, +}); + +/** + * Check if Ollama is running + */ +export const checkOllamaStatus = action({ + args: {}, + handler: async (ctx) => { + const endpoint = getOllamaEndpoint(); + try { + const response = await fetch(`${endpoint}/api/tags`); + + return { + running: response.ok, + endpoint, + }; + } catch (error) { + return { + running: false, + endpoint, + error: "Ollama not accessible", + }; + } + }, +}); + +/** + * Execute chat completion with Ollama model + */ +export const chatWithOllama = action({ + args: { + model: v.string(), + messages: v.array(v.object({ + role: v.string(), + content: v.string(), + })), + stream: v.optional(v.boolean()), + }, + handler: async (ctx, { model, messages, stream = false }) => { + try { + const endpoint = getOllamaEndpoint(); + const response = await fetch(`${endpoint}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model, + messages, + stream, + }), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.statusText}`); + } + + const data = await response.json(); + + return { + success: true, + message: data.message, + model, + done: data.done, + }; + } catch (error: any) { + return { + success: false, + error: error.message, + }; + } + }, +}); + +/** + * Execute test with Ollama model - Main entry point for test execution + * This is called by testExecution.ts when an Ollama model is detected + */ +export const executeOllamaTest = action({ + args: { + testId: v.id("testExecutions"), + agentCode: v.string(), + testQuery: v.string(), + model: v.string(), + systemPrompt: v.optional(v.string()), + timeout: v.optional(v.number()), + }, + handler: async (ctx, args) => { + const startTime = Date.now(); + const endpoint = getOllamaEndpoint(); + + try { + // Update test status to RUNNING + await ctx.runMutation(internal.testExecution.updateStatus, { + testId: args.testId, + status: "RUNNING", + }); + + // Add log + await ctx.runMutation(internal.testExecution.appendLogs, { + testId: args.testId, + logs: [ + `[${new Date().toISOString()}] Starting Ollama test with model: ${args.model}`, + `[${new Date().toISOString()}] Connecting to Ollama at ${endpoint}`, + ], + timestamp: Date.now(), + }); + + // Build messages + const messages: Array<{ role: string; content: string }> = []; + + if (args.systemPrompt) { + messages.push({ + role: "system", + content: args.systemPrompt, + }); + } + + messages.push({ + role: "user", + content: args.testQuery, + }); + + // Call Ollama HTTP API + const response = await fetch(`${endpoint}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: args.model, + messages, + stream: false, + options: { + temperature: 0.7, + }, + }), + signal: AbortSignal.timeout(args.timeout || 180000), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.status} ${response.statusText}`); + } + + const data = await response.json(); + const executionTime = Date.now() - startTime; + + // Add success log + await ctx.runMutation(internal.testExecution.appendLogs, { + testId: args.testId, + logs: [ + `[${new Date().toISOString()}] Ollama response received`, + `[${new Date().toISOString()}] Execution time: ${executionTime}ms`, + `[${new Date().toISOString()}] Tokens - Prompt: ${data.prompt_eval_count || 0}, Completion: ${data.eval_count || 0}`, + ], + timestamp: Date.now(), + }); + + // Update test status to COMPLETED + await ctx.runMutation(internal.testExecution.updateStatus, { + testId: args.testId, + status: "COMPLETED", + success: true, + response: data.message?.content || "", + }); + + return { + success: true, + response: data.message?.content || "", + model: args.model, + provider: "ollama", + cost: 0, // FREE! + executionTime, + tokens: { + prompt: data.prompt_eval_count || 0, + completion: data.eval_count || 0, + total: (data.prompt_eval_count || 0) + (data.eval_count || 0), + }, + }; + } catch (error: any) { + const executionTime = Date.now() - startTime; + + // Log error + await ctx.runMutation(internal.testExecution.appendLogs, { + testId: args.testId, + logs: [ + `[${new Date().toISOString()}] ERROR: ${error.message}`, + `[${new Date().toISOString()}] Execution time: ${executionTime}ms`, + ], + timestamp: Date.now(), + }); + + // Update test status to FAILED + await ctx.runMutation(internal.testExecution.updateStatus, { + testId: args.testId, + status: "FAILED", + success: false, + error: error.message, + errorStage: "ollama_execution", + }); + + return { + success: false, + error: error.message, + model: args.model, + provider: "ollama", + executionTime, + }; + } + }, +}); + +/** + * Helper function to call Ollama API + * Used by testAgentWithOllama for backwards compatibility + */ +async function callOllamaMCP(params: { + model: string; + prompt: string; + systemPrompt?: string; +}) { + const messages = []; + + if (params.systemPrompt) { + messages.push({ + role: "system", + content: params.systemPrompt, + }); + } + + messages.push({ + role: "user", + content: params.prompt, + }); + + const endpoint = getOllamaEndpoint(); + const response = await fetch(`${endpoint}/api/chat`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: params.model, + messages, + stream: false, + }), + }); + + if (!response.ok) { + throw new Error(`Ollama API error: ${response.statusText}`); + } + + const data = await response.json(); + return data.message?.content || ""; +} diff --git a/convex/platformValue.ts b/convex/platformValue.ts new file mode 100644 index 0000000..cb74cb7 --- /dev/null +++ b/convex/platformValue.ts @@ -0,0 +1,62 @@ +/** + * Platform Value Calculator + * Shows users what they get for free vs building themselves + */ + +import { query } from "./_generated/server"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +export const calculatePlatformValue = query({ + args: { agentId: v.id("agents") }, + handler: async (ctx, args) => { + const userId = await getAuthUserId(ctx); + const agent = await ctx.db.get(args.agentId); + if (!agent) return null; + + // Only allow owner or public agents + const isOwner = userId && agent.createdBy === userId; + const isPublic = (agent as any).isPublic === true; + if (!isOwner && !isPublic) return null; + + const toolCount = agent.tools?.length || 0; + const mcpCount = 11; // Built-in MCP servers + + return { + // What user gets for FREE + included: { + infrastructure: { + value: 2000, + items: ["VPC setup", "ECS Fargate config", "ALB", "Security groups", "IAM roles"] + }, + memory: { + value: 1500, + items: ["STM/LTM hybrid", "DynamoDB indexing", "S3 storage", "Auto-routing"] + }, + tools: { + value: toolCount * 100, + items: [`${toolCount} pre-integrated tools`, "No setup required", "Tested & working"] + }, + mcp: { + value: mcpCount * 200, + items: [`${mcpCount} MCP servers`, "Pre-configured", "AgentCore integration"] + }, + ui: { + value: 3000, + items: ["Three chat system", "Agent builder UI", "Test interface", "Monitoring panel"] + }, + monitoring: { + value: 1000, + items: ["CloudWatch logs", "X-Ray tracing", "OTEL instrumentation", "Audit logs"] + }, + deployment: { + value: 1500, + items: ["One-click deploy", "Docker automation", "ECR management", "Zero DevOps"] + } + }, + totalValue: 2000 + 1500 + (toolCount * 100) + (mcpCount * 200) + 3000 + 1000 + 1500, + timeToReplicateHours: 120, // 3 weeks of work + message: "All included in your tier - no additional cost" + }; + } +}); diff --git a/convex/promptChainExecutor.ts b/convex/promptChainExecutor.ts new file mode 100644 index 0000000..fafabc2 --- /dev/null +++ b/convex/promptChainExecutor.ts @@ -0,0 +1,652 @@ +/** + * Prompt Chain Executor + * + * Executes chains of prompts sequentially or in parallel + * Works with manual prompts in visual scripting tool + */ + +"use node"; + +import { action } from "./_generated/server"; +import { internal } from "./_generated/api"; +import { v } from "convex/values"; + +/** + * Execute a chain of prompts sequentially + */ +export const executePromptChain = action({ + args: { + prompts: v.array( + v.object({ + id: v.string(), + template: v.string(), + variables: v.optional(v.any()), + model: v.string(), // "ollama:llama3.2:3b" or "bedrock:claude-3-5-haiku" + extractOutput: v.optional(v.string()), // JSONPath or regex + }) + ), + initialInput: v.any(), + passThroughContext: v.boolean(), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + finalOutput: any; + intermediateResults: Array<{ + promptId: string; + prompt: string; + response: string; + extracted: any; + latency: number; + }>; + totalLatency: number; + error?: string; + }> => { + // Gate: enforce tier-based Bedrock access if any prompt uses a Bedrock model + const hasBedrock = args.prompts.some( ( p ) => p.model.startsWith( "bedrock:" ) ); + let gateUserId: any = null; + let gateModelId: string | undefined; + if ( hasBedrock ) { + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const bedrockModel = args.prompts.find( ( p ) => p.model.startsWith( "bedrock:" ) ); + gateModelId = bedrockModel?.model.substring( "bedrock:".length ); + const gateResult = await requireBedrockAccess( + ctx, gateModelId, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + return { + success: false, + finalOutput: null, + intermediateResults: [], + totalLatency: 0, + error: gateResult.reason, + }; + } + gateUserId = gateResult.userId; + } + + const startTime = Date.now(); + const intermediateResults = []; + let context = args.initialInput; + let totalInputTokens = 0; + let totalOutputTokens = 0; + + try { + for (const promptConfig of args.prompts) { + const promptStartTime = Date.now(); + + // Render template with current context + const renderedPrompt = renderTemplate(promptConfig.template, { + ...promptConfig.variables, + ...context, + }); + + // Execute prompt with specified model + const modelResult = await invokeModel( + promptConfig.model, + renderedPrompt, + ctx + ); + const response = modelResult.text; + totalInputTokens += modelResult.inputTokens; + totalOutputTokens += modelResult.outputTokens; + + // Extract output if specified + let extracted: any = response; + if (promptConfig.extractOutput) { + extracted = extractValue(response, promptConfig.extractOutput); + } + + const latency = Date.now() - promptStartTime; + + intermediateResults.push({ + promptId: promptConfig.id, + prompt: renderedPrompt, + response, + extracted, + latency, + }); + + // Update context for next prompt + if (args.passThroughContext) { + if (typeof extracted === "object" && extracted !== null && !Array.isArray(extracted)) { + context = { ...context, ...(extracted as Record) }; + } else { + context = { ...context, result: extracted }; + } + } else { + context = extracted; + } + } + + // Meter: token-based billing for the entire chain + if ( gateUserId && ( totalInputTokens > 0 || totalOutputTokens > 0 ) ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateUserId, + modelId: gateModelId, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + } ); + } + + const totalLatency = Date.now() - startTime; + + return { + success: true, + finalOutput: context, + intermediateResults, + totalLatency, + }; + } catch (error: any) { + return { + success: false, + finalOutput: null, + intermediateResults, + totalLatency: Date.now() - startTime, + error: error.message || "Prompt chain execution failed", + }; + } + }, +}); + +/** + * Execute prompts in parallel + */ +export const executeParallelPrompts = action({ + args: { + prompts: v.array( + v.object({ + id: v.string(), + template: v.string(), + variables: v.optional(v.any()), + model: v.string(), + }) + ), + sharedContext: v.any(), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + results: Array<{ + promptId: string; + response: string; + latency: number; + error?: string; + }>; + totalLatency: number; + }> => { + // Gate: enforce tier-based Bedrock access if any prompt uses a Bedrock model + const hasBedrock = args.prompts.some( ( p ) => p.model.startsWith( "bedrock:" ) ); + let gateUserId: any = null; + let gateModelId: string | undefined; + if ( hasBedrock ) { + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const bedrockModel = args.prompts.find( ( p ) => p.model.startsWith( "bedrock:" ) ); + gateModelId = bedrockModel?.model.substring( "bedrock:".length ); + const gateResult = await requireBedrockAccess( + ctx, gateModelId, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + return { + success: false, + results: [], + totalLatency: 0, + }; + } + gateUserId = gateResult.userId; + } + + const startTime = Date.now(); + let totalInputTokens = 0; + let totalOutputTokens = 0; + + const resultPromises = args.prompts.map(async (promptConfig) => { + const promptStartTime = Date.now(); + + try { + // Render template + const renderedPrompt = renderTemplate(promptConfig.template, { + ...promptConfig.variables, + ...args.sharedContext, + }); + + // Execute + const modelResult = await invokeModel(promptConfig.model, renderedPrompt, ctx); + totalInputTokens += modelResult.inputTokens; + totalOutputTokens += modelResult.outputTokens; + + return { + promptId: promptConfig.id, + response: modelResult.text, + latency: Date.now() - promptStartTime, + }; + } catch (error: any) { + return { + promptId: promptConfig.id, + response: "", + latency: Date.now() - promptStartTime, + error: error.message, + }; + } + }); + + const results = await Promise.all(resultPromises); + + // Meter: token-based billing for all parallel prompts + if ( gateUserId && ( totalInputTokens > 0 || totalOutputTokens > 0 ) ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateUserId, + modelId: gateModelId, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + } ); + } + + const totalLatency = Date.now() - startTime; + + const success = results.every((r) => !r.error); + + return { + success, + results, + totalLatency, + }; + }, +}); + +/** + * Render template with variables + */ +function renderTemplate(template: string, variables: any): string { + let rendered = template; + + // Replace {variable} placeholders + for (const [key, value] of Object.entries(variables)) { + const placeholder = `{${key}}`; + rendered = rendered.replace(new RegExp(placeholder, "g"), String(value)); + } + + return rendered; +} + +/** + * Extract value from response using JSONPath or regex + */ +function extractValue(response: string, extractor: string): any { + // Try JSON extraction + if (extractor.startsWith("$.") || extractor.startsWith("$[")) { + try { + const json = JSON.parse(response); + // Simple JSONPath implementation + const path = extractor.slice(2).split("."); + let value = json; + for (const key of path) { + value = value[key]; + } + return value; + } catch { + // Fall through to regex + } + } + + // Try regex extraction + try { + const regex = new RegExp(extractor); + const match = response.match(regex); + return match ? match[1] || match[0] : response; + } catch { + // Return full response if extraction fails + return response; + } +} + +/** + * Invoke model (Ollama or Bedrock) + */ +async function invokeModel( + modelSpec: string, + prompt: string, + _ctx: any, +): Promise<{ text: string; inputTokens: number; outputTokens: number }> { + const [provider] = modelSpec.split(":"); + + if (provider === "ollama") { + // Ollama model + const ollamaModel = modelSpec.substring("ollama:".length); + return await invokeOllama(ollamaModel, prompt); + } else if (provider === "bedrock") { + // Bedrock model + const bedrockModel = modelSpec.substring("bedrock:".length); + return await invokeBedrock(bedrockModel, prompt); + } else { + throw new Error(`Unknown provider: ${provider}`); + } +} + +/** + * Invoke Ollama model + */ +async function invokeOllama( + model: string, + prompt: string, +): Promise<{ text: string; inputTokens: number; outputTokens: number }> { + const ollamaHost = process.env.OLLAMA_ENDPOINT || "http://127.0.0.1:11434"; + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 30000); + + try { + const response = await fetch(`${ollamaHost}/api/generate`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model, + prompt, + stream: false, + }), + signal: controller.signal, + }); + + if (!response.ok) { + throw new Error(`Ollama request failed: ${response.status}`); + } + + const data = await response.json(); + return { text: data.response, inputTokens: 0, outputTokens: 0 }; + } finally { + clearTimeout(timeoutId); + } +} + +/** + * Invoke Bedrock model + */ +async function invokeBedrock( + model: string, + prompt: string, +): Promise<{ text: string; inputTokens: number; outputTokens: number }> { + const { BedrockRuntimeClient, InvokeModelCommand } = await import("@aws-sdk/client-bedrock-runtime"); + + const accessKeyId = process.env.AWS_ACCESS_KEY_ID; + const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; + if ( ( accessKeyId && !secretAccessKey ) || ( secretAccessKey && !accessKeyId ) ) { + throw new Error( "AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY must both be set or both be unset" ); + } + + const client = new BedrockRuntimeClient({ + region: process.env.AWS_REGION || "us-east-1", + credentials: accessKeyId && secretAccessKey + ? { accessKeyId, secretAccessKey } + : undefined, + }); + + const command = new InvokeModelCommand({ + modelId: model, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify({ + anthropic_version: "bedrock-2023-05-31", + max_tokens: 4096, + messages: [ + { + role: "user", + content: prompt, + }, + ], + }), + }); + + const response = await client.send(command); + const responseBody = JSON.parse(new TextDecoder().decode(response.body)); + const outputText = responseBody.content?.[0]?.text || ""; + + const { extractTokenUsage, estimateTokenUsage } = await import( "./lib/tokenBilling" ); + let tokenUsage = extractTokenUsage( responseBody, model ); + if ( tokenUsage.totalTokens === 0 ) { + tokenUsage = estimateTokenUsage( prompt, outputText ); + } + + return { + text: outputText, + inputTokens: tokenUsage.inputTokens, + outputTokens: tokenUsage.outputTokens, + }; +} + +/** + * Test a single prompt (for visual scripting tool testing) + */ +export const testPrompt = action({ + args: { + template: v.string(), + variables: v.optional(v.any()), + model: v.string(), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + prompt: string; + response: string; + latency: number; + error?: string; + }> => { + // Gate: enforce tier-based Bedrock access + let gateUserId: any = null; + let gateModelId: string | undefined; + if ( args.model.startsWith( "bedrock:" ) ) { + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + gateModelId = args.model.substring( "bedrock:".length ); + const gateResult = await requireBedrockAccess( + ctx, gateModelId, + async ( lookupArgs ) => ctx.runQuery( internal.users.getInternal, lookupArgs ), + ); + if ( !gateResult.allowed ) { + return { success: false, prompt: args.template, response: "", latency: 0, error: gateResult.reason }; + } + gateUserId = gateResult.userId; + } + + const startTime = Date.now(); + + try { + // Render template + const renderedPrompt = renderTemplate(args.template, args.variables || {}); + + // Execute + const modelResult = await invokeModel(args.model, renderedPrompt, ctx); + + // Meter: token-based billing + if ( gateUserId && ( modelResult.inputTokens > 0 || modelResult.outputTokens > 0 ) ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateUserId, + modelId: gateModelId, + inputTokens: modelResult.inputTokens, + outputTokens: modelResult.outputTokens, + } ); + } + + return { + success: true, + prompt: renderedPrompt, + response: modelResult.text, + latency: Date.now() - startTime, + }; + } catch (error: any) { + return { + success: false, + prompt: renderTemplate(args.template, args.variables || {}), + response: "", + latency: Date.now() - startTime, + error: error.message || "Prompt execution failed", + }; + } + }, +}); + +/** + * Execute a tool from the Strands Agents Tools registry + */ +export const executeTool = action({ + args: { + toolName: v.string(), + toolType: v.string(), // "handoff_to_user", "short_term_memory", etc. + inputs: v.any(), + config: v.any(), + }, + handler: async (_ctx, args): Promise<{ + success: boolean; + outputs: any; + latency: number; + error?: string; + }> => { + const startTime = Date.now(); + + try { + let result: any; + + switch (args.toolType) { + case "handoff": + result = await executeHandoffToUser(args.inputs, args.config); + break; + + case "short_term": + result = await executeShortTermMemory(args.inputs, args.config); + break; + + case "long_term": + result = await executeLongTermMemory(args.inputs, args.config); + break; + + case "semantic": + result = await executeSemanticMemory(args.inputs, args.config); + break; + + case "self_consistency": + result = await executeSelfConsistency(args.inputs, args.config); + break; + + case "tree_of_thoughts": + result = await executeTreeOfThoughts(args.inputs, args.config); + break; + + case "reflexion": + result = await executeReflexion(args.inputs, args.config); + break; + + case "map_reduce": + result = await executeMapReduce(args.inputs, args.config); + break; + + default: + throw new Error(`Unknown tool type: ${args.toolType}`); + } + + return { + success: true, + outputs: result, + latency: Date.now() - startTime, + }; + } catch (error: any) { + return { + success: false, + outputs: null, + latency: Date.now() - startTime, + error: error.message || "Tool execution failed", + }; + } + }, +}); + +/** + * Tool Implementations + */ + +async function executeHandoffToUser(inputs: any, config: any): Promise { + // Store handoff request in database (would use a Convex mutation in production) + return { + status: "pending_user_input", + question: config.question, + options: config.options, + handoffId: `handoff_${Date.now()}`, + }; +} + +async function executeShortTermMemory(inputs: any, config: any): Promise { + // In-memory storage for short-term memory (would use Convex database in production) + const operation = inputs.operation; + + if (operation === "store") { + return { + result: "stored", + key: inputs.key, + value: inputs.value, + }; + } else if (operation === "retrieve") { + return { + result: inputs.value || null, + key: inputs.key, + }; + } + + return { result: "operation_complete" }; +} + +async function executeLongTermMemory(inputs: any, config: any): Promise { + // Persistent storage (would use Convex database + vector DB in production) + return { + result: "stored_in_long_term_memory", + version: 1, + }; +} + +async function executeSemanticMemory(inputs: any, config: any): Promise { + // Vector search (would use actual embedding model in production) + return { + results: [], + relevanceScores: [], + }; +} + +async function executeSelfConsistency(inputs: any, config: any): Promise { + // Multi-path voting (simplified implementation) + const numPaths = config.numPaths || 3; + const answers = []; + + for (let i = 0; i < numPaths; i++) { + // Would invoke model with different temperatures here + answers.push(`answer_${i}`); + } + + return { + finalAnswer: answers[0], // Majority vote + confidence: 0.8, + reasoningPaths: answers, + voteDistribution: { [answers[0]]: numPaths }, + }; +} + +async function executeTreeOfThoughts(inputs: any, config: any): Promise { + // Tree exploration (simplified implementation) + return { + bestPath: ["root", "branch1", "leaf1"], + confidence: 0.9, + treeStructure: {}, + }; +} + +async function executeReflexion(inputs: any, config: any): Promise { + // Self-improvement loop (simplified implementation) + return { + finalResult: "improved_solution", + iterationHistory: [], + improvements: ["fixed_logic_error", "added_evidence"], + }; +} + +async function executeMapReduce(inputs: any, config: any): Promise { + // Map-reduce aggregation (simplified implementation) + return { + result: "aggregated_result", + intermediateResults: [], + }; +} diff --git a/convex/queueProcessor.ts b/convex/queueProcessor.ts index d7e36d5..69c2fa8 100644 --- a/convex/queueProcessor.ts +++ b/convex/queueProcessor.ts @@ -14,7 +14,7 @@ const MAX_CONCURRENT_TESTS = parseInt(process.env.MAX_CONCURRENT_TESTS || "10"); /** * Main queue processor - triggered on-demand when tests are submitted * Also runs periodically as a backup (if enabled in crons.ts) - * + * * Cost optimization: Exits silently when queue is empty (no logs, minimal operations) */ export const processQueue = internalAction({ @@ -79,42 +79,30 @@ export const processQueue = internalAction({ continue; } - // Update test status to BUILDING - await ctx.runMutation(internal.testExecution.updateStatus, { - testId: test._id, - status: "BUILDING", - }); - - await ctx.runMutation(internal.testExecution.appendLogs, { - testId: test._id, - logs: [ - "📦 Test claimed from queue", - "🔨 Starting container build...", - ], - timestamp: Date.now(), - }); - - // SMART ROUTING: Bedrock → AgentCore, Ollama → Lambda + // SMART ROUTING: Bedrock → AgentCore (simplified), Ollama → Fargate const agent = await ctx.runQuery(internal.agents.getInternal, { id: test.agentId }); const modelId = agent?.model || test.modelConfig?.modelId || ''; - + // Check if Bedrock model (starts with provider prefix) - const isBedrockModel = modelId.startsWith('anthropic.') || - modelId.startsWith('amazon.') || + const isBedrockModel = modelId.startsWith('anthropic.') || + modelId.startsWith('amazon.') || modelId.startsWith('ai21.') || modelId.startsWith('cohere.') || modelId.startsWith('meta.') || modelId.startsWith('mistral.'); - + let result; if (isBedrockModel) { - // Route to AgentCore Sandbox (fast, serverless, Bedrock only) + // Route to AgentCore (simplified: Direct Bedrock → Lambda backup) await ctx.runMutation(internal.testExecution.appendLogs, { testId: test._id, - logs: ['🚀 Routing to AgentCore Sandbox (Bedrock model)'], + logs: [ + "📦 Test claimed from queue", + "🚀 Routing to AgentCore (Bedrock model - cost optimized)", + ], timestamp: Date.now(), }); - + result = await ctx.runAction(internal.agentcoreTestExecution.executeAgentCoreTest, { testId: test._id, agentId: test.agentId, @@ -125,7 +113,10 @@ export const processQueue = internalAction({ // Route to ECS Fargate (Docker support for Ollama) await ctx.runMutation(internal.testExecution.appendLogs, { testId: test._id, - logs: ['🚀 Routing to ECS Fargate (Ollama model)'], + logs: [ + "📦 Test claimed from queue", + "🚀 Routing to ECS Fargate (Ollama model)", + ], timestamp: Date.now(), }); @@ -146,20 +137,20 @@ export const processQueue = internalAction({ } if ("error" in result) { - // Failed to start container - console.error(`❌ Failed to start container for test ${test._id}:`, result.error); + // Failed to execute test + console.error(`❌ Failed to execute test ${test._id}:`, result.error); await ctx.runMutation(internal.testExecution.updateStatus, { testId: test._id, status: "FAILED", success: false, error: result.error, - errorStage: "build", + errorStage: "execution", }); await ctx.runMutation(internal.testExecution.appendLogs, { testId: test._id, - logs: [`❌ Container start failed: ${result.error}`], + logs: [`❌ Test execution failed: ${result.error}`], timestamp: Date.now(), }); @@ -179,15 +170,14 @@ export const processQueue = internalAction({ }); } } else { - // Successfully completed (Lambda returns immediately) - console.log(`✅ Lambda test completed for ${test._id}`); + // Successfully completed - execution methods handle their own status updates and usage tracking + const executionMethod = (result as any).executionMethod || 'unknown'; + console.log(`✅ Test ${test._id} completed successfully via ${executionMethod}`); - // Remove from queue + // Remove from queue (execution methods handle their own status updates and usage tracking) await ctx.runMutation(internal.queueProcessor.removeFromQueue, { queueId: queueEntry._id, }); - - // Test status already updated by Lambda execution } } catch (error: any) { console.error(`❌ Error processing test ${queueEntry.testId}:`, error); @@ -334,7 +324,7 @@ export const updateTestWithTaskInfo = internalMutation({ /** * Cleanup abandoned tests (scheduled to run every hour) - * + * * Cost optimization: Exits early if no tests in queue */ export const cleanupAbandonedTests = internalAction({ diff --git a/convex/rateLimiter.ts b/convex/rateLimiter.ts new file mode 100644 index 0000000..7d14ea8 --- /dev/null +++ b/convex/rateLimiter.ts @@ -0,0 +1,457 @@ +/** + * Rate Limiting Middleware for Convex Backend + * Implements sliding window rate limiting with Redis-like behavior + * Protects against abuse while allowing legitimate usage + */ + +import { action, internalMutation, internalQuery } from "./_generated/server"; +import { v } from "convex/values"; +import { internal } from "./_generated/api"; +import type { ActionCtx } from "./_generated/server"; + +interface RateLimitConfig { + windowMs: number; // Time window in milliseconds + maxRequests: number; // Maximum requests per window + blockDurationMs?: number; // How long to block after limit exceeded + burstAllowance?: number; // Allow burst requests +} + +interface RateLimitEntry { + userId: string; + action: string; + requests: number[]; + blockedUntil?: number; + lastRequest: number; +} + +// Maximum number of timestamp entries stored per rate-limit document. +// Prevents unbounded Convex document growth for high-traffic users. +const MAX_RATE_LIMIT_REQUESTS = 200; + +// Default rate limits by action type (used when no tier-specific config is provided) +export const RATE_LIMITS: Record = { + // Agent execution (most expensive) + "agentExecution": { + windowMs: 60 * 1000, // 1 minute + maxRequests: 10, // 10 executions per minute + blockDurationMs: 5 * 60 * 1000, // 5 minute block + }, + + // Agent testing (expensive) + "agentTesting": { + windowMs: 60 * 1000, // 1 minute + maxRequests: 20, // 20 tests per minute + blockDurationMs: 2 * 60 * 1000, // 2 minute block + }, + + // Swarm operations (very expensive) + "swarmExecution": { + windowMs: 5 * 60 * 1000, // 5 minutes + maxRequests: 3, // 3 swarm executions per 5 minutes + blockDurationMs: 15 * 60 * 1000, // 15 minute block + }, + + // Model operations + "modelOperations": { + windowMs: 30 * 1000, // 30 seconds + maxRequests: 50, // 50 operations per 30 seconds + blockDurationMs: 60 * 1000, // 1 minute block + }, + + // General API calls + "generalApi": { + windowMs: 10 * 1000, // 10 seconds + maxRequests: 100, // 100 calls per 10 seconds + blockDurationMs: 30 * 1000, // 30 second block + }, + + // File uploads/downloads + "fileOperations": { + windowMs: 60 * 1000, // 1 minute + maxRequests: 30, // 30 operations per minute + blockDurationMs: 2 * 60 * 1000, // 2 minute block + }, + + // Authentication operations + "authOperations": { + windowMs: 60 * 1000, // 1 minute + maxRequests: 10, // 10 auth operations per minute + blockDurationMs: 5 * 60 * 1000, // 5 minute block + }, +}; + +/** + * Build a tier-aware rate limit config for agent execution / testing. + * Callers pass maxConcurrentTests from tierConfig to avoid import coupling. + * The burst ceiling is maxConcurrentTests * 2 per minute. + * + * Usage: + * const tierCfg = getTierConfig(userTier); + * const rlCfg = buildTierRateLimitConfig(tierCfg.maxConcurrentTests, "agentExecution"); + * const result = await checkRateLimit(ctx, userId, "agentExecution", rlCfg); + */ +export function buildTierRateLimitConfig( + maxConcurrentTests: number, + actionType: "agentExecution" | "agentTesting" +): RateLimitConfig { + const maxPerMinute = maxConcurrentTests * 2; // e.g., freemium=2/min, personal=10/min, enterprise=40/min + const base = RATE_LIMITS[actionType]; + return { + windowMs: base.windowMs, + maxRequests: maxPerMinute, + blockDurationMs: base.blockDurationMs, + }; +} + +/** + * Inline rate limit check for mutations (direct db access). + * Mutations cannot use ctx.runQuery/ctx.runMutation, so this accesses + * the database directly. Actions should use checkRateLimit() instead. + */ +export async function checkRateLimitInMutation( + ctx: { db: any; scheduler: any }, + userId: string, + actionName: string, + config?: RateLimitConfig +): Promise<{ allowed: boolean; reason?: string }> { + const limitConfig = config || RATE_LIMITS[actionName] || RATE_LIMITS.generalApi; + + const entry = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user_action", (q: any) => + q.eq("userId", userId).eq("action", actionName) + ) + .first(); + + const now = Date.now(); + + // Check if user is currently blocked + if (entry?.blockedUntil && now < entry.blockedUntil) { + return { allowed: false, reason: "Rate limited - try again later" }; + } + + // Sliding window check + const windowStart = now - limitConfig.windowMs; + const validRequests = entry?.requests?.filter((t: number) => t > windowStart) || []; + + if (validRequests.length >= limitConfig.maxRequests) { + const blockedUntil = now + (limitConfig.blockDurationMs || limitConfig.windowMs); + if (entry) { + await ctx.db.patch(entry._id, { blockedUntil, lastRequest: now }); + } else { + await ctx.db.insert("rateLimitEntries", { + userId, action: actionName, requests: validRequests, blockedUntil, lastRequest: now, + }); + } + return { + allowed: false, + reason: `Rate limit: ${limitConfig.maxRequests} per ${limitConfig.windowMs / 1000}s`, + }; + } + + // Record the request, trimming to MAX_RATE_LIMIT_REQUESTS + const newRequests = [...validRequests, now].slice(-MAX_RATE_LIMIT_REQUESTS); + if (entry) { + await ctx.db.patch(entry._id, { requests: newRequests, lastRequest: now }); + } else { + await ctx.db.insert("rateLimitEntries", { + userId, action: actionName, requests: newRequests, lastRequest: now, + }); + } + + return { allowed: true }; +} + +/** + * Check if request is within rate limits (for actions - uses ctx.runQuery/ctx.runMutation) + */ +export async function checkRateLimit( + ctx: ActionCtx, + userId: string, + action: string, + config?: RateLimitConfig +): Promise<{ + allowed: boolean; + remainingRequests: number; + resetTime: number; + blockedUntil?: number; + reason?: string; +}> { + const limitConfig = config || RATE_LIMITS[action] || RATE_LIMITS.generalApi; + + // Get current rate limit entry + const entry = await ctx.runQuery(internal.rateLimiter.getRateLimitEntry, { + userId, + action, + }); + + const now = Date.now(); + + // Check if user is currently blocked + if (entry?.blockedUntil && now < entry.blockedUntil) { + return { + allowed: false, + remainingRequests: 0, + resetTime: entry.blockedUntil, + blockedUntil: entry.blockedUntil, + reason: "Rate limit exceeded - temporarily blocked", + }; + } + + // Clean old requests outside the window + const windowStart = now - limitConfig.windowMs; + const validRequests = entry?.requests.filter((time: number) => time > windowStart) || []; + + // Check if within limits + if (validRequests.length >= limitConfig.maxRequests) { + // Block the user + const blockedUntil = now + (limitConfig.blockDurationMs || limitConfig.windowMs); + + await ctx.runMutation(internal.rateLimiter.updateRateLimitEntry, { + userId, + action, + requests: validRequests, + blockedUntil, + lastRequest: now, + }); + + return { + allowed: false, + remainingRequests: 0, + resetTime: blockedUntil, + blockedUntil, + reason: `Rate limit exceeded: ${limitConfig.maxRequests} requests per ${limitConfig.windowMs / 1000}s`, + }; + } + + // Update the entry with new request, trimming to MAX_RATE_LIMIT_REQUESTS + // to prevent unbounded document growth in Convex. + const newRequests = [...validRequests, now].slice(-MAX_RATE_LIMIT_REQUESTS); + await ctx.runMutation(internal.rateLimiter.updateRateLimitEntry, { + userId, + action, + requests: newRequests, + lastRequest: now, + }); + + const remainingRequests = limitConfig.maxRequests - newRequests.length; + const resetTime = now + limitConfig.windowMs; + + return { + allowed: true, + remainingRequests, + resetTime, + }; +} + +/** + * Rate limiting middleware wrapper for actions + */ +export function withRateLimit( + actionName: string, + config?: RateLimitConfig +) { + return (fn: (ctx: ActionCtx, ...args: T) => Promise) => { + return async (ctx: ActionCtx, ...args: T): Promise => { + // Get user ID from Convex auth + const identity = await ctx.auth.getUserIdentity(); + const userId = identity?.subject; + + if (!userId) { + throw new Error("Authentication required"); + } + + // Check rate limit + const rateLimitResult = await checkRateLimit(ctx, String(userId), actionName, config); + + if (!rateLimitResult.allowed) { + throw new Error(`Rate limit exceeded: ${rateLimitResult.reason}`); + } + + // Execute the original function + return fn(ctx, ...args); + }; + }; +} + +/** + * Get rate limit entry for user and action + */ +export const getRateLimitEntry = internalQuery({ + args: { + userId: v.string(), + action: v.string(), + }, + handler: async (ctx, args): Promise => { + const entry = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user_action", (q) => + q.eq("userId", args.userId).eq("action", args.action) + ) + .first(); + + return entry as RateLimitEntry | null; + }, +}); + +/** + * Update or create rate limit entry + */ +export const updateRateLimitEntry = internalMutation({ + args: { + userId: v.string(), + action: v.string(), + requests: v.array(v.number()), + blockedUntil: v.optional(v.number()), + lastRequest: v.number(), + }, + handler: async (ctx, args) => { + const existing = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user_action", (q) => + q.eq("userId", args.userId).eq("action", args.action) + ) + .first(); + + if (existing) { + await ctx.db.patch(existing._id, { + requests: args.requests, + blockedUntil: args.blockedUntil, + lastRequest: args.lastRequest, + }); + } else { + await ctx.db.insert("rateLimitEntries", { + userId: args.userId, + action: args.action, + requests: args.requests, + blockedUntil: args.blockedUntil, + lastRequest: args.lastRequest, + }); + } + }, +}); + +/** + * Clean up old rate limit entries (run periodically) + */ +export const cleanupOldEntries = internalMutation({ + handler: async (ctx) => { + const cutoffTime = Date.now() - (24 * 60 * 60 * 1000); // 24 hours ago + + const oldEntries = await ctx.db + .query("rateLimitEntries") + .filter((q) => q.lt(q.field("lastRequest"), cutoffTime)) + .collect(); + + for (const entry of oldEntries) { + await ctx.db.delete(entry._id); + } + + return { cleaned: oldEntries.length }; + }, +}); + +/** + * Get rate limit status for user + */ +export const getRateLimitStatus = internalQuery({ + args: { + userId: v.string(), + action: v.string(), + }, + handler: async (ctx, args): Promise<{ + currentRequests: number; + limit: number; + windowMs: number; + blockedUntil?: number; + resetTime: number; + } | null> => { + // Direct db access instead of ctx.runQuery (cannot nest queries inside internalQuery) + const entry = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user_action", (q) => + q.eq("userId", args.userId).eq("action", args.action) + ) + .first(); + + const config = RATE_LIMITS[args.action] || RATE_LIMITS.generalApi; + + if (!entry) { + return { + currentRequests: 0, + limit: config.maxRequests, + windowMs: config.windowMs, + resetTime: Date.now() + config.windowMs, + }; + } + + const now = Date.now(); + const windowStart = now - config.windowMs; + const validRequests = entry.requests.filter((time: number) => time > windowStart); + + return { + currentRequests: validRequests.length, + limit: config.maxRequests, + windowMs: config.windowMs, + blockedUntil: entry.blockedUntil, + resetTime: now + config.windowMs, + }; + }, +}); + +/** + * Reset rate limits for a user (admin function) + */ +export const resetUserRateLimits = internalMutation({ + args: { + userId: v.string(), + action: v.optional(v.string()), // If not provided, reset all actions + }, + handler: async (ctx, args) => { + if (args.action) { + // Reset specific action + const entry = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user_action", (q) => + q.eq("userId", args.userId).eq("action", args.action!) + ) + .first(); + + if (entry) { + await ctx.db.delete(entry._id); + } + } else { + // Reset all actions for user + const entries = await ctx.db + .query("rateLimitEntries") + .withIndex("by_user", (q) => q.eq("userId", args.userId)) + .collect(); + + for (const entry of entries) { + await ctx.db.delete(entry._id); + } + } + }, +}); + +/** + * Get rate limit statistics (admin only) + */ +export const getRateLimitStats = internalQuery({ + handler: async (ctx) => { + const totalEntries = await ctx.db.query("rateLimitEntries").collect(); + const blockedUsers = totalEntries.filter(entry => entry.blockedUntil && entry.blockedUntil > Date.now()); + + // Group by action + const byAction: Record = {}; + for (const entry of totalEntries) { + byAction[entry.action] = (byAction[entry.action] || 0) + 1; + } + + return { + totalEntries: totalEntries.length, + blockedUsers: blockedUsers.length, + actions: byAction, + }; + }, +}); diff --git a/convex/realExecution.test.ts b/convex/realExecution.test.ts index ca9bc2c..837859b 100644 --- a/convex/realExecution.test.ts +++ b/convex/realExecution.test.ts @@ -28,11 +28,10 @@ describe("REAL Agent Execution (Integration Tests)", () => { // Create test user testUserId = await t.run(async (ctx: any) => { return await ctx.db.insert("users", { - userId: "test-user-real-execution", email: "real@test.com", name: "Real Test User", tier: "personal", - testsThisMonth: 0, + executionsThisMonth: 0, createdAt: Date.now(), isAnonymous: false, }); diff --git a/convex/schema.ts b/convex/schema.ts index 8c8ab49..004f288 100644 --- a/convex/schema.ts +++ b/convex/schema.ts @@ -4,258 +4,339 @@ import { authTables } from "@convex-dev/auth/server"; const applicationTables = { // User AWS Accounts for Tier 2 (Cross-Account Deployment) - userAWSAccounts: defineTable({ - userId: v.id("users"), + userAWSAccounts: defineTable( { + userId: v.id( "users" ), externalId: v.string(), // Unique security token - roleArn: v.optional(v.string()), // User's cross-account role ARN - region: v.optional(v.string()), - awsAccountId: v.optional(v.string()), + roleArn: v.optional( v.string() ), // User's cross-account role ARN + region: v.optional( v.string() ), + awsAccountId: v.optional( v.string() ), status: v.string(), // "pending", "connected", "disconnected" createdAt: v.number(), - connectedAt: v.optional(v.number()), - disconnectedAt: v.optional(v.number()), - }) - .index("by_user_id", ["userId"]) - .index("by_user_and_external_id", ["userId", "externalId"]) - .index("by_external_id", ["externalId"]), + connectedAt: v.optional( v.number() ), + disconnectedAt: v.optional( v.number() ), + } ) + .index( "by_user_id", ["userId"] ) + .index( "by_user_and_external_id", ["userId", "externalId"] ) + .index( "by_external_id", ["externalId"] ), // Deployment History (Merged: Simple + AWS Deployments) - deployments: defineTable({ + deployments: defineTable( { // Identity - agentId: v.id("agents"), - userId: v.id("users"), + agentId: v.id( "agents" ), + userId: v.id( "users" ), // Tier & Account Info tier: v.string(), // "freemium", "personal", "enterprise" - awsAccountId: v.optional(v.string()), + awsAccountId: v.optional( v.string() ), region: v.string(), - environment: v.optional(v.string()), // dev | staging | prod + environment: v.optional( v.string() ), // dev | staging | prod // Deployment Configuration - agentName: v.optional(v.string()), - description: v.optional(v.string()), + agentName: v.optional( v.string() ), + description: v.optional( v.string() ), // AWS Resources - taskArn: v.optional(v.string()), // ECS task ARN - agentCoreRuntimeId: v.optional(v.string()), - agentCoreEndpoint: v.optional(v.string()), - cloudFormationStackId: v.optional(v.string()), - ecrRepositoryUri: v.optional(v.string()), - s3BucketName: v.optional(v.string()), - deploymentPackageKey: v.optional(v.string()), - awsCallerArn: v.optional(v.string()), + taskArn: v.optional( v.string() ), // ECS task ARN + agentCoreRuntimeId: v.optional( v.string() ), + agentCoreEndpoint: v.optional( v.string() ), + cloudFormationStackId: v.optional( v.string() ), + ecrRepositoryUri: v.optional( v.string() ), + s3BucketName: v.optional( v.string() ), + deploymentPackageKey: v.optional( v.string() ), + awsCallerArn: v.optional( v.string() ), // Status & Progress status: v.string(), // "running", "completed", "failed", "CREATING", "ACTIVE", etc. - progress: v.optional(v.object({ + progress: v.optional( v.object( { stage: v.string(), percentage: v.number(), message: v.string(), - currentStep: v.optional(v.string()), - totalSteps: v.optional(v.number()), - })), + currentStep: v.optional( v.string() ), + totalSteps: v.optional( v.number() ), + } ) ), // Configuration - enableMonitoring: v.optional(v.boolean()), - enableAutoScaling: v.optional(v.boolean()), - enableXRay: v.optional(v.boolean()), - logRetentionDays: v.optional(v.number()), + enableMonitoring: v.optional( v.boolean() ), + enableAutoScaling: v.optional( v.boolean() ), + enableXRay: v.optional( v.boolean() ), + logRetentionDays: v.optional( v.number() ), // Logs & Errors - error: v.optional(v.string()), - logs: v.optional(v.union( + error: v.optional( v.string() ), + logs: v.optional( v.union( v.string(), // Simple string logs - v.array(v.object({ // Structured logs + v.array( v.object( { // Structured logs timestamp: v.number(), level: v.string(), message: v.string(), - source: v.optional(v.string()), - })) - )), + source: v.optional( v.string() ), + } ) ) + ) ), // Timestamps startedAt: v.number(), - completedAt: v.optional(v.number()), - createdAt: v.optional(v.number()), - updatedAt: v.optional(v.number()), - deployedAt: v.optional(v.number()), - deletedAt: v.optional(v.number()), + completedAt: v.optional( v.number() ), + createdAt: v.optional( v.number() ), + updatedAt: v.optional( v.number() ), + deployedAt: v.optional( v.number() ), + deletedAt: v.optional( v.number() ), // Metadata - version: v.optional(v.string()), - isActive: v.optional(v.boolean()), - lastHealthCheck: v.optional(v.number()), - healthStatus: v.optional(v.string()), - }) - .index("by_agent", ["agentId"]) - .index("by_user", ["userId"]) - .index("by_tier", ["tier"]) - .index("by_status", ["status"]) - .index("by_active", ["isActive"]), + version: v.optional( v.string() ), + isActive: v.optional( v.boolean() ), + lastHealthCheck: v.optional( v.number() ), + healthStatus: v.optional( v.string() ), + } ) + .index( "by_agent", ["agentId"] ) + .index( "by_user", ["userId"] ) + .index( "by_tier", ["tier"] ) + .index( "by_status", ["status"] ) + .index( "by_active", ["isActive"] ), // User Profiles with Tier Information - users: defineTable({ + users: defineTable( { + role: v.optional( v.union( + v.literal( "admin" ), + v.literal( "user" ), + v.literal( "paid" ), + v.literal( "guest" ), + v.literal( "enterprise" ) + ) ), // DO NOT add userId field - use _id (Convex user document ID) instead - email: v.optional(v.string()), - name: v.optional(v.string()), - image: v.optional(v.string()), // Profile picture URL - tier: v.optional(v.string()), // "freemium", "personal", "enterprise" - testsThisMonth: v.optional(v.number()), // For freemium limits - upgradedAt: v.optional(v.number()), - createdAt: v.optional(v.number()), - isAnonymous: v.optional(v.boolean()), // For anonymous users + email: v.optional( v.string() ), + name: v.optional( v.string() ), + image: v.optional( v.string() ), // Profile picture URL + emailVerificationTime: v.optional( v.number() ), // When email was verified (OAuth providers) + tier: v.optional( v.string() ), // "freemium", "personal", "enterprise" + testsThisMonth: v.optional( v.number() ), // For freemium limits + upgradedAt: v.optional( v.number() ), + createdAt: v.optional( v.number() ), + isAnonymous: v.optional( v.boolean() ), // For anonymous users + deviceId: v.optional( v.string() ), // Browser fingerprint for anonymous users + mergedInto: v.optional( v.id( "users" ) ), // If anonymous account was merged + mergedAt: v.optional( v.number() ), // When account was merged // OAuth provider-specific fields - locale: v.optional(v.string()), // Google: user's locale (e.g., "en-US") - login: v.optional(v.string()), // GitHub: username - authProvider: v.optional(v.string()), // "github" | "google" | "cognito" | "password" + locale: v.optional( v.string() ), // Google: user's locale (e.g., "en-US") + login: v.optional( v.string() ), // GitHub: username + authProvider: v.optional( v.string() ), // "github" | "google" | "cognito" | "password" // Auth metadata - lastSignIn: v.optional(v.number()), - signInCount: v.optional(v.number()), - + lastSignIn: v.optional( v.number() ), + signInCount: v.optional( v.number() ), + + // Usage Tracking + lastTestAt: v.optional( v.number() ), // Last test execution timestamp + totalTokensUsed: v.optional( v.number() ), // Total tokens consumed across all tests + totalExecutionTime: v.optional( v.number() ), // Total execution time in milliseconds + // AWS Deployment Credentials - awsAuthMethod: v.optional(v.union(v.literal("assumeRole"), v.literal("direct"))), - awsRoleArn: v.optional(v.string()), - awsAccessKeyId: v.optional(v.string()), - awsSecretAccessKey: v.optional(v.string()), - awsConfiguredAt: v.optional(v.number()), + awsAuthMethod: v.optional( v.union( v.literal( "assumeRole" ), v.literal( "direct" ) ) ), + awsRoleArn: v.optional( v.string() ), + awsAccessKeyId: v.optional( v.string() ), + awsSecretAccessKey: v.optional( v.string() ), + awsConfiguredAt: v.optional( v.number() ), // AWS Federated Identity (for Cognito users) - awsIdentityId: v.optional(v.string()), // Cognito Identity Pool ID - awsCredentials: v.optional(v.object({ + awsIdentityId: v.optional( v.string() ), // Cognito Identity Pool ID + awsCredentials: v.optional( v.object( { accessKeyId: v.string(), secretKey: v.string(), sessionToken: v.string(), expiration: v.number(), - })), - awsCredentialsUpdatedAt: v.optional(v.number()), - }) - .index("by_tier", ["tier"]) - .index("by_email", ["email"]) - .index("by_auth_provider", ["authProvider"]), + } ) ), + awsCredentialsUpdatedAt: v.optional( v.number() ), + + // Stripe Billing + stripeCustomerId: v.optional( v.string() ), + stripeSubscriptionId: v.optional( v.string() ), + subscriptionStatus: v.optional( v.string() ), // "active", "past_due", "canceled" + currentPeriodEnd: v.optional( v.number() ), // Unix timestamp + executionsThisMonth: v.optional( v.number() ), // Weighted units consumed in current billing period (token-based: 2x AWS cost at $0.05/unit) + rawCallsThisMonth: v.optional( v.number() ), // Unweighted call count for analytics + tokensInputThisMonth: v.optional( v.number() ), // Total input tokens consumed this billing period + tokensOutputThisMonth: v.optional( v.number() ), // Total output tokens consumed this billing period + billingPeriodStart: v.optional( v.number() ), // Start of current billing period + } ) + .index( "by_tier", ["tier"] ) + .index( "by_email", ["email"] ) + .index( "by_auth_provider", ["authProvider"] ) + .index( "by_device_id", ["deviceId"] ) + .index( "by_stripe_customer_id", ["stripeCustomerId"] ), // API Keys for external access and usage tracking - apiKeys: defineTable({ - userId: v.id("users"), + apiKeys: defineTable( { + userId: v.id( "users" ), name: v.string(), - description: v.optional(v.string()), + description: v.optional( v.string() ), keyHash: v.string(), keyPrefix: v.string(), isActive: v.boolean(), testsUsed: v.number(), - lastUsed: v.optional(v.number()), + lastUsed: v.optional( v.number() ), createdAt: v.number(), - revokedAt: v.optional(v.number()), - }) - .index("by_user", ["userId"]) - .index("by_hash", ["keyHash"]), + revokedAt: v.optional( v.number() ), + } ) + .index( "by_user", ["userId"] ) + .index( "by_hash", ["keyHash"] ), - agents: defineTable({ + agents: defineTable( { name: v.string(), - description: v.optional(v.string()), + description: v.optional( v.string() ), model: v.string(), - modelProvider: v.optional(v.string()), // "bedrock", "ollama", "openai", etc. + modelProvider: v.optional( v.string() ), // "bedrock", "ollama", "openai", etc. + ollamaEndpoint: v.optional( v.string() ), // e.g., "http://localhost:11434" systemPrompt: v.string(), - tools: v.array(v.object({ + tools: v.array( v.object( { name: v.string(), type: v.string(), - config: v.optional(v.object({ - description: v.optional(v.string()), - parameters: v.optional(v.array(v.object({ + config: v.optional( v.object( { + description: v.optional( v.string() ), + parameters: v.optional( v.array( v.object( { name: v.string(), type: v.string(), - description: v.optional(v.string()), - required: v.optional(v.boolean()), - }))), - })), - requiresPip: v.optional(v.boolean()), - pipPackages: v.optional(v.array(v.string())), - extrasPip: v.optional(v.string()), - notSupportedOn: v.optional(v.array(v.string())), - })), + description: v.optional( v.string() ), + required: v.optional( v.boolean() ), + } ) ) ), + } ) ), + requiresPip: v.optional( v.boolean() ), + pipPackages: v.optional( v.array( v.string() ) ), + extrasPip: v.optional( v.string() ), + notSupportedOn: v.optional( v.array( v.string() ) ), + } ) ), generatedCode: v.string(), - dockerConfig: v.optional(v.string()), + dockerConfig: v.optional( v.string() ), deploymentType: v.string(), // "aws", "ollama", "docker", "agentcore" - createdBy: v.id("users"), - isPublic: v.optional(v.boolean()), - tier: v.optional(v.string()), // "freemium", "personal", "enterprise" + sourceWorkflowId: v.optional( v.id( "workflows" ) ), + createdBy: v.id( "users" ), + isPublic: v.optional( v.boolean() ), + tier: v.optional( v.string() ), // "freemium", "personal", "enterprise" // MCP Configuration - mcpServers: v.optional(v.array(v.object({ + mcpServers: v.optional( v.array( v.object( { name: v.string(), command: v.string(), - args: v.array(v.string()), - env: v.optional(v.any()), - disabled: v.optional(v.boolean()), - }))), + args: v.array( v.string() ), + env: v.optional( v.any() ), + disabled: v.optional( v.boolean() ), + } ) ) ), // Dynamic Tools (Meta-tooling) - dynamicTools: v.optional(v.array(v.object({ + dynamicTools: v.optional( v.array( v.object( { name: v.string(), code: v.string(), parameters: v.any(), - }))), + } ) ) ), // MCP Tool Exposure - exposableAsMCPTool: v.optional(v.boolean()), - mcpToolName: v.optional(v.string()), - mcpInputSchema: v.optional(v.any()), + exposableAsMCPTool: v.optional( v.boolean() ), + mcpToolName: v.optional( v.string() ), + mcpInputSchema: v.optional( v.any() ), + + // Dynamic Model Switching (Unified Modality Switching) + enableDynamicModelSwitching: v.optional( v.boolean() ), + modelSwitchingConfig: v.optional( v.object( { + preferCost: v.optional( v.boolean() ), + preferSpeed: v.optional( v.boolean() ), + preferCapability: v.optional( v.boolean() ), + minComplexityForSonnet: v.optional( v.number() ), + minComplexityForOpus: v.optional( v.number() ), + } ) ), // Architecture & Deployment Metadata - diagramUrl: v.optional(v.string()), - lastDeployedAt: v.optional(v.number()), - deploymentCount: v.optional(v.number()), + diagramUrl: v.optional( v.string() ), + lastDeployedAt: v.optional( v.number() ), + deploymentCount: v.optional( v.number() ), // Timestamps - createdAt: v.optional(v.number()), - updatedAt: v.optional(v.number()), - }).index("by_user", ["createdBy"]) - .index("by_public", ["isPublic"]) - .index("by_mcp_tool_name", ["mcpToolName"]) - .index("by_user_and_tier", ["createdBy", "tier"]) - .index("by_deployment_type", ["deploymentType"]) - .index("by_model_provider", ["modelProvider"]), - - templates: defineTable({ + createdAt: v.optional( v.number() ), + updatedAt: v.optional( v.number() ), + } ).index( "by_user", ["createdBy"] ) + .index( "by_public", ["isPublic"] ) + .index( "by_mcp_tool_name", ["mcpToolName"] ) + .index( "by_user_and_tier", ["createdBy", "tier"] ) + .index( "by_deployment_type", ["deploymentType"] ) + .index( "by_model_provider", ["modelProvider"] ) + .index( "by_source_workflow", ["sourceWorkflowId"] ), + + templates: defineTable( { name: v.string(), description: v.string(), category: v.string(), model: v.string(), systemPrompt: v.string(), - tools: v.array(v.object({ + tools: v.array( v.object( { name: v.string(), type: v.string(), - config: v.optional(v.any()), - requiresPip: v.optional(v.boolean()), - pipPackages: v.optional(v.array(v.string())), - })), - isOfficial: v.optional(v.boolean()), - }).index("by_category", ["category"]), + config: v.optional( v.any() ), + requiresPip: v.optional( v.boolean() ), + pipPackages: v.optional( v.array( v.string() ) ), + } ) ), + isOfficial: v.optional( v.boolean() ), + } ).index( "by_category", ["category"] ), // Conversation Management - conversations: defineTable({ - agentId: v.id("agents"), - userId: v.id("users"), + conversations: defineTable( { + agentId: v.id( "agents" ), + userId: v.id( "users" ), title: v.string(), - messages: v.array(v.object({ - role: v.union(v.literal("user"), v.literal("assistant"), v.literal("system")), + // DEPRECATED: Embedded messages array can exceed Convex document size limit. + // New code should write to the conversationMessages table instead. + // Kept for backward compatibility; existing consumers still read this field. + messages: v.array( v.object( { + role: v.union( v.literal( "user" ), v.literal( "assistant" ), v.literal( "system" ) ), content: v.string(), timestamp: v.number(), - metadata: v.optional(v.any()), - })), + metadata: v.optional( v.any() ), + } ) ), createdAt: v.number(), updatedAt: v.number(), - }) - .index("by_user", ["userId"]) - .index("by_agent", ["agentId"]) - .index("by_agent_user", ["agentId", "userId"]), + } ) + .index( "by_user", ["userId"] ) + .index( "by_agent", ["agentId"] ) + .index( "by_agent_user", ["agentId", "userId"] ), + + // Normalized conversation messages (one document per message, mirrors interleavedMessages pattern). + // Replaces the unbounded embedded conversations.messages array. + conversationMessages: defineTable( { + conversationId: v.id( "conversations" ), + role: v.union( v.literal( "user" ), v.literal( "assistant" ), v.literal( "system" ) ), + content: v.string(), + timestamp: v.number(), + metadata: v.optional( v.any() ), + sequenceNumber: v.number(), // For deterministic ordering + } ) + .index( "by_conversation", ["conversationId", "sequenceNumber"] ) + .index( "by_conversation_ts", ["conversationId", "timestamp"] ), + + // Conversation Analysis for Agent Improvement + conversationAnalyses: defineTable( { + conversationId: v.id( "conversations" ), + agentId: v.id( "agents" ), + analysis: v.any(), // Detailed analysis object from conversationAnalysis.ts + createdAt: v.number(), + } ) + .index( "by_conversation", ["conversationId"] ) + .index( "by_agent", ["agentId"] ) + .index( "by_created", ["createdAt"] ), + + // Agent Improvement History + agentImprovementHistory: defineTable( { + agentId: v.id( "agents" ), + conversationId: v.id( "conversations" ), + improvementPlan: v.any(), + changes: v.array( v.string() ), // List of changes applied + appliedAt: v.number(), + } ) + .index( "by_agent", ["agentId"] ) + .index( "by_conversation", ["conversationId"] ) + .index( "by_applied", ["appliedAt"] ), // Containerized Agent Testing System - testExecutions: defineTable({ + testExecutions: defineTable( { // Identity - agentId: v.id("agents"), - userId: v.id("users"), + agentId: v.id( "agents" ), + userId: v.id( "users" ), // Test Configuration testQuery: v.string(), @@ -263,76 +344,76 @@ const applicationTables = { requirements: v.string(), dockerfile: v.string(), modelProvider: v.string(), // "ollama" | "bedrock" - modelConfig: v.object({ - baseUrl: v.optional(v.string()), - modelId: v.optional(v.string()), - region: v.optional(v.string()), - testEnvironment: v.optional(v.string()), // "docker" | "agentcore" | "fargate" - }), + modelConfig: v.object( { + baseUrl: v.optional( v.string() ), + modelId: v.optional( v.string() ), + region: v.optional( v.string() ), + testEnvironment: v.optional( v.string() ), // "docker" | "agentcore" | "fargate" + } ), timeout: v.number(), - agentRuntimeArn: v.optional(v.string()), // For AgentCore testing - conversationId: v.optional(v.id("conversations")), // For conversation context + agentRuntimeArn: v.optional( v.string() ), // For AgentCore testing + conversationId: v.optional( v.id( "conversations" ) ), // For conversation context // Execution State status: v.string(), // CREATED | QUEUED | BUILDING | RUNNING | COMPLETED | FAILED | ABANDONED | ARCHIVED phase: v.string(), // queued | building | running | completed // Infrastructure - ecsTaskArn: v.optional(v.string()), - ecsTaskId: v.optional(v.string()), - cloudwatchLogGroup: v.optional(v.string()), - cloudwatchLogStream: v.optional(v.string()), + ecsTaskArn: v.optional( v.string() ), + ecsTaskId: v.optional( v.string() ), + cloudwatchLogGroup: v.optional( v.string() ), + cloudwatchLogStream: v.optional( v.string() ), // Execution Logs - logs: v.array(v.string()), - lastLogFetchedAt: v.optional(v.number()), + logs: v.array( v.string() ), + lastLogFetchedAt: v.optional( v.number() ), // Results - success: v.optional(v.boolean()), - response: v.optional(v.string()), - error: v.optional(v.string()), - errorStage: v.optional(v.string()), + success: v.optional( v.boolean() ), + response: v.optional( v.string() ), + error: v.optional( v.string() ), + errorStage: v.optional( v.string() ), // Metrics submittedAt: v.number(), - startedAt: v.optional(v.number()), - completedAt: v.optional(v.number()), - executionTime: v.optional(v.number()), - buildTime: v.optional(v.number()), - queueWaitTime: v.optional(v.number()), - memoryUsed: v.optional(v.number()), - cpuUsed: v.optional(v.number()), + startedAt: v.optional( v.number() ), + completedAt: v.optional( v.number() ), + executionTime: v.optional( v.number() ), + buildTime: v.optional( v.number() ), + queueWaitTime: v.optional( v.number() ), + memoryUsed: v.optional( v.number() ), + cpuUsed: v.optional( v.number() ), // Deployment Package - deploymentPackageUrl: v.optional(v.string()), - deploymentPackageExpiry: v.optional(v.number()), - }) - .index("by_user", ["userId", "submittedAt"]) - .index("by_agent", ["agentId", "submittedAt"]) - .index("by_status", ["status", "submittedAt"]), - - testQueue: defineTable({ - testId: v.id("testExecutions"), + deploymentPackageUrl: v.optional( v.string() ), + deploymentPackageExpiry: v.optional( v.number() ), + } ) + .index( "by_user", ["userId", "submittedAt"] ) + .index( "by_agent", ["agentId", "submittedAt"] ) + .index( "by_status", ["status", "submittedAt"] ), + + testQueue: defineTable( { + testId: v.id( "testExecutions" ), priority: v.number(), // 1 = high, 2 = normal, 3 = low status: v.string(), // pending | claimed | abandoned - testType: v.optional(v.string()), // docker | agentcore + testType: v.optional( v.string() ), // docker | agentcore // Timestamps createdAt: v.number(), - claimedAt: v.optional(v.number()), - claimedBy: v.optional(v.string()), + claimedAt: v.optional( v.number() ), + claimedBy: v.optional( v.string() ), // Retry Tracking attempts: v.number(), - lastError: v.optional(v.string()), - }) - .index("by_status_priority", ["status", "priority", "createdAt"]) - .index("by_test", ["testId"]), + lastError: v.optional( v.string() ), + } ) + .index( "by_status_priority", ["status", "priority", "createdAt"] ) + .index( "by_test", ["testId"] ), - deploymentPackages: defineTable({ - testId: v.id("testExecutions"), - agentId: v.id("agents"), - userId: v.id("users"), + deploymentPackages: defineTable( { + testId: v.id( "testExecutions" ), + agentId: v.id( "agents" ), + userId: v.id( "users" ), // Package Metadata packageName: v.string(), @@ -345,60 +426,60 @@ const applicationTables = { urlExpiresAt: v.number(), // Contents Manifest - files: v.array(v.object({ + files: v.array( v.object( { path: v.string(), size: v.number(), checksum: v.string(), - })), + } ) ), // Download Tracking generatedAt: v.number(), downloadCount: v.number(), - lastDownloadedAt: v.optional(v.number()), - }) - .index("by_test", ["testId"]) - .index("by_user", ["userId", "generatedAt"]) - .index("by_expiry", ["urlExpiresAt"]), + lastDownloadedAt: v.optional( v.number() ), + } ) + .index( "by_test", ["testId"] ) + .index( "by_user", ["userId", "generatedAt"] ) + .index( "by_expiry", ["urlExpiresAt"] ), // MCP Server Configuration - mcpServers: defineTable({ + mcpServers: defineTable( { // Identity name: v.string(), - userId: v.id("users"), + userId: v.id( "users" ), // Server Configuration command: v.string(), - args: v.array(v.string()), - env: v.optional(v.object({})), + args: v.array( v.string() ), + env: v.optional( v.any() ), // Accepts arbitrary env maps (e.g. { "PATH": "/usr/bin" }) disabled: v.boolean(), - timeout: v.optional(v.number()), // Timeout in milliseconds + timeout: v.optional( v.number() ), // Timeout in milliseconds // Connection Status status: v.string(), // "connected" | "disconnected" | "error" | "unknown" - lastConnected: v.optional(v.number()), - lastError: v.optional(v.string()), + lastConnected: v.optional( v.number() ), + lastError: v.optional( v.string() ), // Tool Discovery - availableTools: v.optional(v.array(v.object({ + availableTools: v.optional( v.array( v.object( { name: v.string(), - description: v.optional(v.string()), - inputSchema: v.optional(v.any()), - }))), + description: v.optional( v.string() ), + inputSchema: v.optional( v.any() ), + } ) ) ), // Timestamps createdAt: v.number(), updatedAt: v.number(), - }) - .index("by_user", ["userId"]) - .index("by_name", ["name"]) - .index("by_status", ["status"]) - .index("by_user_and_name", ["userId", "name"]), + } ) + .index( "by_user", ["userId"] ) + .index( "by_name", ["name"] ) + .index( "by_status", ["status"] ) + .index( "by_user_and_name", ["userId", "name"] ), // Architecture Diagrams - diagrams: defineTable({ + diagrams: defineTable( { // Identity - deploymentId: v.id("deployments"), - userId: v.id("users"), + deploymentId: v.id( "deployments" ), + userId: v.id( "users" ), // Diagram Content format: v.string(), // "svg" | "png" | "mermaid" @@ -406,144 +487,173 @@ const applicationTables = { // Metadata generatedAt: v.number(), - resourceCount: v.optional(v.number()), // Number of AWS resources in the diagram - diagramType: v.optional(v.string()), // "architecture" | "network" | "security" - }) - .index("by_deployment", ["deploymentId"]) - .index("by_user", ["userId", "generatedAt"]) - .index("by_deployment_and_format", ["deploymentId", "format"]), + resourceCount: v.optional( v.number() ), // Number of AWS resources in the diagram + diagramType: v.optional( v.string() ), // "architecture" | "network" | "security" + } ) + .index( "by_deployment", ["deploymentId"] ) + .index( "by_user", ["userId", "generatedAt"] ) + .index( "by_deployment_and_format", ["deploymentId", "format"] ), // Error Logs - errorLogs: defineTable({ + errorLogs: defineTable( { category: v.string(), // "oauth" | "mcp" | "agent" | "deployment" | "general" severity: v.string(), // "info" | "warning" | "error" | "critical" message: v.string(), - details: v.optional(v.any()), - userId: v.optional(v.id("users")), - stackTrace: v.optional(v.string()), - metadata: v.optional(v.object({ - provider: v.optional(v.string()), - serverName: v.optional(v.string()), - agentId: v.optional(v.string()), - deploymentId: v.optional(v.string()), - requestId: v.optional(v.string()), - })), + details: v.optional( v.any() ), + userId: v.optional( v.id( "users" ) ), + stackTrace: v.optional( v.string() ), + metadata: v.optional( v.object( { + provider: v.optional( v.string() ), + serverName: v.optional( v.string() ), + agentId: v.optional( v.string() ), + deploymentId: v.optional( v.string() ), + requestId: v.optional( v.string() ), + } ) ), timestamp: v.number(), resolved: v.boolean(), - resolvedAt: v.optional(v.number()), - resolution: v.optional(v.string()), - }) - .index("by_category", ["category", "timestamp"]) - .index("by_severity", ["severity", "timestamp"]) - .index("by_user", ["userId", "timestamp"]) - .index("by_resolved", ["resolved", "timestamp"]), + resolvedAt: v.optional( v.number() ), + resolution: v.optional( v.string() ), + } ) + .index( "by_category", ["category", "timestamp"] ) + .index( "by_severity", ["severity", "timestamp"] ) + .index( "by_user", ["userId", "timestamp"] ) + .index( "by_resolved", ["resolved", "timestamp"] ), // Audit Logs - auditLogs: defineTable({ + auditLogs: defineTable( { eventType: v.string(), // "oauth_login" | "mcp_invocation" | "agent_invocation" | "deployment_created" - userId: v.optional(v.id("users")), + userId: v.optional( v.id( "users" ) ), action: v.string(), - resource: v.optional(v.string()), - resourceId: v.optional(v.string()), + resource: v.optional( v.string() ), + resourceId: v.optional( v.string() ), success: v.boolean(), - details: v.optional(v.any()), - metadata: v.optional(v.object({ - provider: v.optional(v.string()), - serverName: v.optional(v.string()), - toolName: v.optional(v.string()), - agentId: v.optional(v.string()), - ipAddress: v.optional(v.string()), - userAgent: v.optional(v.string()), - })), + details: v.optional( v.any() ), + metadata: v.optional( v.object( { + provider: v.optional( v.string() ), + serverName: v.optional( v.string() ), + toolName: v.optional( v.string() ), + agentId: v.optional( v.string() ), + ipAddress: v.optional( v.string() ), + userAgent: v.optional( v.string() ), + } ) ), timestamp: v.number(), - }) - .index("by_event_type", ["eventType", "timestamp"]) - .index("by_user", ["userId", "timestamp"]) - .index("by_resource", ["resource", "resourceId"]) - .index("by_timestamp", ["timestamp"]), + } ) + .index( "by_event_type", ["eventType", "timestamp"] ) + .index( "by_user", ["userId", "timestamp"] ) + .index( "by_resource", ["resource", "resourceId"] ) + .index( "by_timestamp", ["timestamp"] ), + + // Agent Build Sessions (Automated Builder with Woz Questions) + agentBuildSessions: defineTable( { + userId: v.id( "users" ), + status: v.string(), // "gathering_requirements" | "ready_to_generate" | "completed" + currentQuestion: v.number(), + agentRequirements: v.object( { + agentType: v.union( v.string(), v.null() ), + targetUsers: v.union( v.string(), v.null() ), + problems: v.array( v.string() ), + tools: v.array( v.string() ), + tone: v.union( v.string(), v.null() ), + testingPreference: v.union( v.string(), v.null() ), + domainKnowledge: v.union( v.string(), v.null() ), + knowledgeBase: v.union( v.string(), v.null() ), + documentUrls: v.array( v.string() ), + } ), + conversationHistory: v.array( v.object( { + role: v.union( v.literal( "user" ), v.literal( "assistant" ) ), + content: v.string(), + reasoning: v.optional( v.string() ), + timestamp: v.number(), + } ) ), + generatedAgentConfig: v.optional( v.any() ), + createdAt: v.number(), + updatedAt: v.number(), + } ) + .index( "by_user", ["userId", "updatedAt"] ) + .index( "by_status", ["status", "updatedAt"] ), // Interleaved Reasoning Conversations - interleavedConversations: defineTable({ - userId: v.optional(v.id("users")), - conversationToken: v.optional(v.string()), // For anonymous users - agentId: v.optional(v.id("agents")), // Optional: Associate conversation with specific agent + interleavedConversations: defineTable( { + userId: v.optional( v.id( "users" ) ), + conversationToken: v.optional( v.string() ), // For anonymous users + agentId: v.optional( v.id( "agents" ) ), // Optional: Associate conversation with specific agent title: v.string(), systemPrompt: v.string(), - messages: v.optional(v.array(v.object({ - role: v.union(v.literal("user"), v.literal("assistant")), + messages: v.optional( v.array( v.object( { + role: v.union( v.literal( "user" ), v.literal( "assistant" ) ), content: v.string(), - reasoning: v.optional(v.string()), - toolCalls: v.optional(v.any()), + reasoning: v.optional( v.string() ), + toolCalls: v.optional( v.any() ), timestamp: v.number(), - }))), // DEPRECATED: Use interleavedMessages table instead - messageCount: v.optional(v.number()), // OPTIONAL: Computed on-demand from interleavedMessages count + } ) ) ), // DEPRECATED: Use interleavedMessages table instead + messageCount: v.optional( v.number() ), // OPTIONAL: Computed on-demand from interleavedMessages count contextSize: v.number(), // Size in bytes (approximate) - s3ContextKey: v.optional(v.string()), // S3 key for archived context + s3ContextKey: v.optional( v.string() ), // S3 key for archived context createdAt: v.number(), updatedAt: v.number(), isActive: v.boolean(), - }) - .index("by_user", ["userId", "updatedAt"]) - .index("by_token", ["conversationToken"]) - .index("by_agent", ["agentId", "updatedAt"]), + } ) + .index( "by_user", ["userId", "updatedAt"] ) + .index( "by_token", ["conversationToken"] ) + .index( "by_agent", ["agentId", "updatedAt"] ), // Interleaved Messages (one document per message for efficient writes) - interleavedMessages: defineTable({ - conversationId: v.id("interleavedConversations"), - role: v.union(v.literal("user"), v.literal("assistant")), + interleavedMessages: defineTable( { + conversationId: v.id( "interleavedConversations" ), + role: v.union( v.literal( "user" ), v.literal( "assistant" ) ), content: v.string(), - reasoning: v.optional(v.string()), // Claude's thinking process - toolCalls: v.optional(v.any()), + reasoning: v.optional( v.string() ), // Claude's thinking process + toolCalls: v.optional( v.any() ), timestamp: v.number(), sequenceNumber: v.number(), // For ordering messages - }) - .index("by_conversation", ["conversationId", "sequenceNumber"]) - .index("by_timestamp", ["conversationId", "timestamp"]), + } ) + .index( "by_conversation", ["conversationId", "sequenceNumber"] ) + .index( "by_timestamp", ["conversationId", "timestamp"] ), // Agent Memory store (Convex + S3 hybrid) - agentMemories: defineTable({ - agentId: v.optional(v.id("agents")), - conversationId: v.optional(v.id("interleavedConversations")), + agentMemories: defineTable( { + agentId: v.optional( v.id( "agents" ) ), + conversationId: v.optional( v.id( "interleavedConversations" ) ), memoryType: v.string(), - title: v.optional(v.string()), - summary: v.optional(v.string()), - content: v.optional(v.string()), - s3Key: v.optional(v.string()), - metadata: v.optional(v.any()), - tokenCount: v.optional(v.number()), + title: v.optional( v.string() ), + summary: v.optional( v.string() ), + content: v.optional( v.string() ), + s3Key: v.optional( v.string() ), + metadata: v.optional( v.any() ), + tokenCount: v.optional( v.number() ), createdAt: v.number(), - archived: v.optional(v.boolean()), - }) - .index("by_agent", ["agentId", "createdAt"]) - .index("by_conversation", ["conversationId", "createdAt"]) - .index("by_type", ["memoryType", "createdAt"]), + archived: v.optional( v.boolean() ), + } ) + .index( "by_agent", ["agentId", "createdAt"] ) + .index( "by_conversation", ["conversationId", "createdAt"] ) + .index( "by_type", ["memoryType", "createdAt"] ), // Dynamic Tools (Meta-tooling) - dynamicTools: defineTable({ + dynamicTools: defineTable( { // Identity name: v.string(), displayName: v.string(), description: v.string(), - userId: v.id("users"), - agentId: v.optional(v.id("agents")), // Agent that created this tool + userId: v.id( "users" ), + agentId: v.optional( v.id( "agents" ) ), // Agent that created this tool // Tool Code code: v.string(), // Python code with @tool decorator validated: v.boolean(), // Whether code passed syntax validation - validationError: v.optional(v.string()), + validationError: v.optional( v.string() ), // Tool Metadata parameters: v.any(), // JSON schema for tool parameters - returnType: v.optional(v.string()), - category: v.optional(v.string()), + returnType: v.optional( v.string() ), + category: v.optional( v.string() ), // Dependencies - pipPackages: v.optional(v.array(v.string())), - extrasPip: v.optional(v.string()), + pipPackages: v.optional( v.array( v.string() ) ), + extrasPip: v.optional( v.string() ), // Usage Tracking invocationCount: v.number(), - lastInvokedAt: v.optional(v.number()), + lastInvokedAt: v.optional( v.number() ), successCount: v.number(), errorCount: v.number(), @@ -553,17 +663,158 @@ const applicationTables = { // Status isActive: v.boolean(), - isPublic: v.optional(v.boolean()), - }) - .index("by_user", ["userId", "createdAt"]) - .index("by_agent", ["agentId"]) - .index("by_name", ["name"]) - .index("by_active", ["isActive"]) - .index("by_public", ["isPublic"]), + isPublic: v.optional( v.boolean() ), + } ) + .index( "by_user", ["userId", "createdAt"] ) + .index( "by_agent", ["agentId"] ) + .index( "by_name", ["name"] ) + .index( "by_active", ["isActive"] ) + .index( "by_public", ["isPublic"] ), + + // Rate Limiting System + // NOTE: The requests array is bounded to MAX_RATE_LIMIT_REQUESTS (200) entries. + // Writers (rateLimiter.ts) MUST trim old entries on every write to prevent + // unbounded growth. The sliding-window filter + slice enforces this cap. + rateLimitEntries: defineTable( { + userId: v.string(), + action: v.string(), + requests: v.array( v.number() ), // Timestamps of requests in current window (max 200) + blockedUntil: v.optional( v.number() ), // Timestamp when block expires + lastRequest: v.number(), // Timestamp of last request + } ) + .index( "by_user_action", ["userId", "action"] ) + .index( "by_user", ["userId"] ) + .index( "by_action", ["action"] ), + + // Multi-Agent Sessions (Swarm/Graph/Workflow patterns) + multiAgentSessions: defineTable( { + parentAgentId: v.id( "agents" ), + parentConversationId: v.optional( v.id( "interleavedConversations" ) ), + pattern: v.string(), // "swarm" | "graph" | "workflow" + executionMode: v.string(), // "parallel" | "sequential" | "mixed" + agentIds: v.array( v.id( "agents" ) ), + status: v.string(), // "running" | "completed" | "failed" + startedAt: v.number(), + completedAt: v.optional( v.number() ), + result: v.optional( v.any() ), + } ) + .index( "by_parent_agent", ["parentAgentId"] ) + .index( "by_parent_conversation", ["parentConversationId"] ) + .index( "by_pattern", ["pattern", "startedAt"] ) + .index( "by_status", ["status", "startedAt"] ), + + // Multi-Agent Results (individual agent outputs in multi-agent sessions) + multiAgentResults: defineTable( { + sessionId: v.id( "multiAgentSessions" ), + agentId: v.id( "agents" ), + conversationId: v.id( "interleavedConversations" ), + result: v.any(), + status: v.string(), // "running" | "completed" | "failed" + startedAt: v.number(), + completedAt: v.optional( v.number() ), + executionTime: v.optional( v.number() ), + error: v.optional( v.string() ), + } ) + .index( "by_session", ["sessionId", "completedAt"] ) + .index( "by_agent", ["agentId", "completedAt"] ) + .index( "by_status", ["status", "completedAt"] ), }; -export default defineSchema({ +/* ────────────────────────────────────────────────────────────── + * Shared Convex validators for workflow nodes and edges. + * Re-used by workflows, workflowTemplates, and workflowExecutions + * tables so the shape is defined exactly once (DRY). + * ────────────────────────────────────────────────────────────── */ +const workflowNodeValidator = v.object( { + id: v.string(), + type: v.optional( v.string() ), + position: v.optional( v.object( { x: v.number(), y: v.number() } ) ), + data: v.object( { + type: v.string(), + label: v.optional( v.string() ), + notes: v.optional( v.string() ), + config: v.any(), // Config shape varies per node kind; validated at runtime by sanitizeNode + } ), +} ); + +const workflowEdgeValidator = v.object( { + id: v.optional( v.string() ), + source: v.string(), + target: v.string(), + sourceHandle: v.optional( v.string() ), + targetHandle: v.optional( v.string() ), + label: v.optional( v.string() ), + type: v.optional( v.string() ), +} ); + +export default defineSchema( { + workflows: defineTable( { + name: v.string(), + userId: v.string(), // Auth identity token (subject/tokenIdentifier), not a Convex doc ID + templateId: v.string(), + nodes: v.array( workflowNodeValidator ), + edges: v.array( workflowEdgeValidator ), + status: v.string(), + createdAt: v.number(), + updatedAt: v.number(), + } ).index( "by_user", ["userId"] ).index( "by_template", ["templateId"] ), + + workflowExecutions: defineTable( { + workflowId: v.id( "workflows" ), + userId: v.string(), // Auth identity token (subject/tokenIdentifier), not a Convex doc ID + input: v.optional( v.object( { + userMessage: v.optional( v.string() ), + } ) ), + output: v.optional( v.object( { + success: v.boolean(), + result: v.optional( v.any() ), // execution results are genuinely polymorphic + error: v.optional( v.string() ), + executionTime: v.optional( v.number() ), + } ) ), + executionLog: v.array( v.object( { + nodeId: v.optional( v.string() ), + nodeType: v.optional( v.string() ), + nodeLabel: v.optional( v.string() ), + executionTime: v.optional( v.number() ), + result: v.optional( v.any() ), // node results vary per type + } ) ), + duration: v.number(), + status: v.string(), + createdAt: v.number(), + } ).index( "by_workflow", ["workflowId"] ).index( "by_user", ["userId"] ), + + // Workflow Templates (pre-built agent workflows) + workflowTemplates: defineTable( { + name: v.string(), + description: v.string(), + category: v.string(), + icon: v.string(), + difficulty: v.string(), + nodes: v.array( workflowNodeValidator ), + connections: v.array( workflowEdgeValidator ), + isOfficial: v.boolean(), + usageCount: v.number(), + createdAt: v.number(), + } ) + .index( "by_category", ["category"] ) + .index( "by_popularity", ["usageCount"] ) + .index( "by_official", ["isOfficial"] ), + + // Tool memory storage for memory tools (short-term, long-term, semantic) + toolMemory: defineTable( { + userId: v.string(), // Auth identity string (e.g. "anonymous"), not v.id("users") — see resolveUserId() in tools.ts + memoryType: v.string(), // "short_term" | "long_term" | "semantic" + key: v.string(), + value: v.string(), // JSON-stringified for safety + metadata: v.optional( v.string() ), + ttl: v.optional( v.number() ), + createdAt: v.number(), + updatedAt: v.number(), + } ) + .index( "by_key", ["userId", "memoryType", "key"] ) + .index( "by_type", ["userId", "memoryType"] ), + ...authTables, ...applicationTables, -}); +} ); diff --git a/convex/strandsAgentExecution.ts b/convex/strandsAgentExecution.ts index a6f508c..bf66fc4 100644 --- a/convex/strandsAgentExecution.ts +++ b/convex/strandsAgentExecution.ts @@ -8,6 +8,7 @@ import { v } from "convex/values"; import { api, internal } from "./_generated/api"; import type { ActionCtx } from "./_generated/server"; import type { Doc, Id } from "./_generated/dataModel"; +import { resolveBedrockModelId } from "./modelRegistry"; type AgentDoc = Doc<"agents">; @@ -38,6 +39,7 @@ interface AgentExecutionBase { error?: string; reasoning?: string; toolCalls?: ToolCall[]; + tokenUsage?: { inputTokens: number; outputTokens: number; totalTokens: number }; } type AgentExecutionSuccess = AgentExecutionBase & { @@ -63,39 +65,97 @@ type BedrockContentBlock = | { type: "text"; text: string } | { type: "thinking"; thinking: string } | { type: "tool_use"; id?: string; name?: string; input?: unknown } - | { type: string; [key: string]: unknown }; + | { type: string;[key: string]: unknown }; type BedrockInvokeResponse = { content?: BedrockContentBlock[]; + // Meta/Llama + generation?: string; + // Mistral + outputs?: Array<{ text?: string }>; + // Cohere + generations?: Array<{ text?: string }>; + // AI21 + completions?: Array<{ data?: { text?: string } }>; + // Amazon Titan + results?: Array<{ outputText?: string }>; }; -export const executeAgentWithStrandsAgents = action({ +export const executeAgentWithStrandsAgents = action( { args: { - agentId: v.id("agents"), - conversationId: v.optional(v.id("interleavedConversations")), + agentId: v.id( "agents" ), + conversationId: v.optional( v.id( "interleavedConversations" ) ), message: v.string(), }, - handler: async (ctx, args): Promise => { + handler: async ( ctx, args ): Promise => { try { - const agent = (await ctx.runQuery(internal.strandsAgentExecution.getAgentInternal, { + const agent = ( await ctx.runQuery( internal.strandsAgentExecution.getAgentInternal, { agentId: args.agentId, - })) as AgentDoc | null; + } ) ); - if (!agent) { - throw new Error("Agent not found"); + if ( !agent ) { + throw new Error( "Agent not found" ); + } + + // Model gating: Check if user's tier allows the agent's model provider + const { isProviderAllowedForTier, isBedrockModelAllowedForTier, getTierConfig } = await import( "./lib/tierConfig" ); + const agentOwner = await ctx.runQuery( internal.users.getInternal, { id: agent.createdBy } ); + const userTier = agentOwner?.tier || "freemium"; + + // Burst rate limit: enforce tier-aware per-minute ceiling + const { checkRateLimit, buildTierRateLimitConfig } = await import( "./rateLimiter" ); + const tierCfg = getTierConfig( userTier ); + const rlCfg = buildTierRateLimitConfig( tierCfg.maxConcurrentTests, "agentExecution" ); + const rlResult = await checkRateLimit( ctx, String( agent.createdBy ), "agentExecution", rlCfg ); + if ( !rlResult.allowed ) { + return { + success: false, + error: rlResult.reason || "Rate limit exceeded. Please wait before running more executions.", + }; + } + // Detect Bedrock: honor explicit deploymentType first, then fall back to + // model-ID pattern matching (Bedrock IDs use prefixes like "anthropic.", + // "anthropic.", "amazon.", "meta.", "mistral.", "cohere.", "ai21."). + const isBedrock = agent.deploymentType === "bedrock" + || ( !agent.deploymentType && /^(us\.|eu\.|apac\.|global\.)?(anthropic|amazon|meta|mistral|cohere|ai21)\./.test( agent.model ) ); + if ( isBedrock && !isProviderAllowedForTier( userTier, "bedrock" ) ) { + return { + success: false, + error: "Bedrock models require a Personal subscription ($5/month). " + + "Use local Ollama models for free, or upgrade in Settings → Billing.", + }; + } + if ( isBedrock && !isBedrockModelAllowedForTier( userTier, agent.model ) ) { + return { + success: false, + error: `Model ${agent.model} is not available on the ${userTier} tier. ` + + "Upgrade your subscription for access to this model.", + }; } let history: ConversationMessage[] = []; - if (args.conversationId) { - history = (await ctx.runQuery(internal.interleavedReasoning.getConversationHistory, { + if ( args.conversationId ) { + history = ( await ctx.runQuery( internal.interleavedReasoning.getConversationHistory, { conversationId: args.conversationId, windowSize: 10, - })) as ConversationMessage[]; + } ) ) as ConversationMessage[]; } - return await executeViaAgentCore(ctx, agent, args.message, history); - } catch (error: unknown) { - console.error("Agent execution error:", error); + const result = await executeViaAgentCore( ctx, agent, args.message, history ); + + // ─── Token-based metering ─────────────────────────────────────────── + if ( result.tokenUsage ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: agent.createdBy, + modelId: agent.model, + inputTokens: result.tokenUsage.inputTokens, + outputTokens: result.tokenUsage.outputTokens, + } ); + } + + return result; + } catch ( error: unknown ) { + console.error( "Agent execution error:", error ); const message = error instanceof Error ? error.message : "Agent execution failed"; return { success: false, @@ -103,7 +163,7 @@ export const executeAgentWithStrandsAgents = action({ }; } }, -}); +} ); async function executeViaAgentCore( ctx: ActionCtx, @@ -111,7 +171,7 @@ async function executeViaAgentCore( message: string, history: ConversationMessage[] ): Promise { - return await executeDirectBedrock(ctx, agent, message, history); + return await executeDirectBedrock( ctx, agent, message, history ); } async function executeDirectBedrock( @@ -121,82 +181,176 @@ async function executeDirectBedrock( history: ConversationMessage[] ): Promise { const { BedrockRuntimeClient, InvokeModelCommand } = - await import("@aws-sdk/client-bedrock-runtime"); + await import( "@aws-sdk/client-bedrock-runtime" ); - const client = new BedrockRuntimeClient({ + const client = new BedrockRuntimeClient( { region: process.env.AWS_REGION || "us-east-1", credentials: { accessKeyId: process.env.AWS_ACCESS_KEY_ID!, secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, }, - }); + } ); const messages: Array<{ role: string; content: Array<{ text: string }> }> = []; - for (const msg of history) { - messages.push({ + for ( const msg of history ) { + messages.push( { role: msg.role, content: [{ text: msg.content }], - }); + } ); } - messages.push({ + messages.push( { role: "user", content: [{ text: message }], - }); - - let modelId = agent.model; - if (!modelId.includes(":") && !modelId.startsWith("us.") && !modelId.startsWith("anthropic.")) { - const modelMap: Record = { - "claude-3-5-sonnet-20241022": "us.anthropic.claude-3-5-sonnet-20241022-v2:0", - "claude-3-5-haiku-20241022": "us.anthropic.claude-3-5-haiku-20241022-v1:0", - "claude-3-opus-20240229": "anthropic.claude-3-opus-20240229-v1:0", + } ); + + const modelId = resolveBedrockModelId( agent.model ); + + // Branch payload format by provider: Anthropic Messages API vs generic Bedrock + const isAnthropicModel = modelId.includes( "anthropic" ) || modelId.includes( "claude" ); + let payload: Record; + + if ( isAnthropicModel ) { + payload = { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 4096, + system: agent.systemPrompt, + messages: messages, + temperature: 1, + thinking: { + type: "enabled", + budget_tokens: 3000, + }, }; - modelId = modelMap[agent.model] || "us.anthropic.claude-3-5-haiku-20241022-v1:0"; + } else { + // Non-Anthropic Bedrock models (Llama, Mistral, etc.) use a plain + // prompt-based payload compatible with InvokeModelCommand. + const promptText = messages.map( + ( m: { role: string; content: Array<{ text: string }> } ) => + `${m.role}: ${m.content.map( ( c ) => c.text ).join( "" )}` + ).join( "\n" ); + const systemPrefix = agent.systemPrompt ? `system: ${agent.systemPrompt}\n` : ""; + + if ( modelId.includes( "meta" ) || modelId.includes( "llama" ) ) { + payload = { + prompt: `${systemPrefix}${promptText}\nassistant:`, + max_gen_len: 4096, + temperature: 0.7, + }; + } else if ( modelId.includes( "mistral" ) ) { + payload = { + prompt: `[INST] ${systemPrefix}${promptText} [/INST]`, + max_tokens: 4096, + temperature: 0.7, + }; + } else { + // Generic Bedrock model fallback (Cohere, AI21, Titan, etc.) + // Amazon Titan requires a different payload shape: use `inputText` + // and wrap generation options in `textGenerationConfig` with + // `maxTokenCount` (replacing `max_tokens`) and `temperature`. + const isTitan = modelId.toLowerCase().includes( "titan" ); + + if ( isTitan ) { + // Titan-compatible payload + payload = { + inputText: `${systemPrefix}${promptText}`, + textGenerationConfig: { + maxTokenCount: 4096, + temperature: 0.7, + }, + }; + } else { + // Existing prompt/max_tokens shape for other Bedrock providers + payload = { + prompt: `${systemPrefix}${promptText}\nassistant:`, + max_tokens: 4096, + temperature: 0.7, + }; + } + } } - const payload = { - anthropic_version: "bedrock-2023-05-31", - max_tokens: 4096, - system: agent.systemPrompt, - messages: messages, - temperature: 1, - thinking: { - type: "enabled", - budget_tokens: 3000, - }, - }; - - const command = new InvokeModelCommand({ + const command = new InvokeModelCommand( { modelId: modelId, contentType: "application/json", accept: "application/json", - body: JSON.stringify(payload), - }); + body: JSON.stringify( payload ), + } ); - const response = await client.send(command); + const response = await client.send( command ); const responseBody = JSON.parse( - new TextDecoder().decode(response.body) + new TextDecoder().decode( response.body ) ) as BedrockInvokeResponse; let content = ""; let reasoning = ""; const toolCalls: ToolCall[] = []; - for (const block of responseBody.content || []) { - if (block.type === "text") { - content += block.text; - } else if (block.type === "thinking") { - reasoning += block.thinking; - } else if (block.type === "tool_use") { - const id = typeof block.id === "string" ? block.id : undefined; - const name = typeof block.name === "string" ? block.name : undefined; - toolCalls.push({ - id, - name, - input: block.input, - }); + if ( responseBody.content && Array.isArray( responseBody.content ) ) { + // Anthropic models: content is an array of typed blocks + for ( const block of responseBody.content ) { + if ( block.type === "text" ) { + content += block.text; + } else if ( block.type === "thinking" ) { + reasoning += block.thinking; + } else if ( block.type === "tool_use" ) { + const id = typeof block.id === "string" ? block.id : undefined; + const name = typeof block.name === "string" ? block.name : undefined; + toolCalls.push( { + id, + name, + input: block.input, + } ); + } } + } else if ( typeof responseBody.generation === "string" ) { + // Meta/Llama models: single generation string + content = responseBody.generation; + } else if ( responseBody.outputs && Array.isArray( responseBody.outputs ) ) { + // Mistral models: outputs array with text fields + content = responseBody.outputs.map( ( o ) => o.text || "" ).join( "" ); + } else if ( responseBody.generations && Array.isArray( responseBody.generations ) ) { + // Cohere models: generations array + content = responseBody.generations.map( ( g ) => g.text || "" ).join( "" ); + } else if ( responseBody.completions && Array.isArray( responseBody.completions ) ) { + // AI21 models: completions array + content = responseBody.completions.map( ( c ) => c.data?.text || "" ).join( "" ); + } else if ( responseBody.results && Array.isArray( responseBody.results ) ) { + // Amazon Titan models: results array + content = responseBody.results.map( ( r ) => r.outputText || "" ).join( "" ); + } else { + // Fallback: try to extract text from any field in the response + // Use the already-parsed `responseBody` and avoid logging raw/sensitive content. + console.warn( `Unrecognized Bedrock response format for model ${modelId}. Response did not match expected fields.` ); + try { + if ( typeof responseBody === "string" ) { + const parsed = JSON.parse( responseBody ); + content = typeof parsed === "string" ? parsed : JSON.stringify( parsed ); + } else if ( responseBody && typeof responseBody === "object" ) { + // Preserve a JSON representation of the object as the fallback content + content = JSON.stringify( responseBody ); + } else { + content = String( responseBody ); + } + } catch { + // If JSON.parse fails for some reason, fall back to a best-effort string + try { + content = JSON.stringify( responseBody ); + } catch { + content = String( responseBody ); + } + } + } + + // ─── Token extraction for billing ─────────────────────────────────────── + const { extractTokenUsage, estimateTokenUsage } = await import( "./lib/tokenBilling" ); + let tokenUsage = extractTokenUsage( responseBody, modelId ); + + // Fallback: estimate from text when provider doesn't return counts + if ( tokenUsage.totalTokens === 0 ) { + const inputText = JSON.stringify( payload ); + tokenUsage = estimateTokenUsage( inputText, content ); } return { @@ -204,6 +358,7 @@ async function executeDirectBedrock( content: content.trim(), reasoning: reasoning.trim() || undefined, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + tokenUsage, metadata: { model: modelId, modelProvider: "bedrock", @@ -212,22 +367,22 @@ async function executeDirectBedrock( }; } -export const getAgentInternal = internalQuery({ +export const getAgentInternal = internalQuery( { args: { - agentId: v.id("agents"), + agentId: v.id( "agents" ), }, - handler: async (ctx, args): Promise => { - const agent = (await ctx.db.get(args.agentId)) as AgentDoc | null; + handler: async ( ctx, args ): Promise => { + const agent = ( await ctx.db.get( args.agentId ) ); return agent; }, -}); +} ); -export const testAgentExecution = action({ +export const testAgentExecution = action( { args: { - agentId: v.id("agents"), - testMessage: v.optional(v.string()), + agentId: v.id( "agents" ), + testMessage: v.optional( v.string() ), }, - handler: async (ctx, args): Promise< + handler: async ( ctx, args ): Promise< AgentExecutionResult & { testMessage: string; conversationId: Id<"interleavedConversations">; @@ -235,16 +390,16 @@ export const testAgentExecution = action({ > => { const testMessage = args.testMessage || "Hello! Please introduce yourself and list your available tools."; - const conversation = (await ctx.runMutation(api.interleavedReasoning.createConversation, { + const conversation = ( await ctx.runMutation( api.interleavedReasoning.createConversation, { title: "Agent Test", systemPrompt: "Test conversation", - })) as ConversationCreateResult; + } ) ) as ConversationCreateResult; - const result = (await ctx.runAction(api.strandsAgentExecution.executeAgentWithStrandsAgents, { + const result = ( await ctx.runAction( api.strandsAgentExecution.executeAgentWithStrandsAgents, { agentId: args.agentId, conversationId: conversation.conversationId, message: testMessage, - })) as AgentExecutionResult; + } ) ); return { ...result, @@ -252,4 +407,4 @@ export const testAgentExecution = action({ conversationId: conversation.conversationId, }; }, -}); +} ); diff --git a/convex/strandsAgentExecutionDynamic.ts b/convex/strandsAgentExecutionDynamic.ts new file mode 100644 index 0000000..1605d74 --- /dev/null +++ b/convex/strandsAgentExecutionDynamic.ts @@ -0,0 +1,409 @@ +/** + * Strands Agents Execution with Dynamic Model Switching + * Automatically selects the best model based on conversation complexity + */ + +import { action, internalQuery } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import type { ActionCtx } from "./_generated/server"; +import type { Doc, Id } from "./_generated/dataModel"; +import { + analyzeComplexity, + calculateComplexityScore, + selectModel, + decideModelSwitch, + type ModelSwitchDecision, +} from "./lib/dynamicModelSwitching"; +import { resolveBedrockModelId } from "./modelRegistry"; + +type AgentDoc = Doc<"agents">; + +type ConversationMessage = { + role: "user" | "assistant" | "system" | "tool"; + content: string; + timestamp: number; + reasoning?: string; + toolCalls?: unknown; +}; + +type ToolCall = { + id?: string; + name?: string; + input?: unknown; +}; + +type AgentExecutionMetadata = { + model: string; + modelProvider: string; + executionMethod: "agentcore" | "direct-bedrock-api"; + modelSwitchDecision?: ModelSwitchDecision; + originalModel?: string; +}; + +interface AgentExecutionBase { + success: boolean; + metadata?: AgentExecutionMetadata; + content?: string; + error?: string; + reasoning?: string; + toolCalls?: ToolCall[]; + tokenUsage?: { inputTokens: number; outputTokens: number; totalTokens: number }; +} + +type AgentExecutionSuccess = AgentExecutionBase & { + success: true; + content: string; + reasoning?: string; + toolCalls?: ToolCall[]; +}; + +type AgentExecutionFailure = AgentExecutionBase & { + success: false; + error: string; +}; + +type AgentExecutionResult = AgentExecutionSuccess | AgentExecutionFailure; + +type BedrockContentBlock = + | { type: "text"; text: string } + | { type: "thinking"; thinking: string } + | { type: "tool_use"; id?: string; name?: string; input?: unknown } + | { type: string;[key: string]: unknown }; + +type BedrockInvokeResponse = { + content?: BedrockContentBlock[]; + // Meta/Llama + generation?: string; + // Mistral + outputs?: Array<{ text?: string }>; + // Cohere + generations?: Array<{ text?: string }>; + // AI21 + completions?: Array<{ data?: { text?: string } }>; + // Amazon Titan + results?: Array<{ outputText?: string }>; +}; + +/** + * Execute agent with dynamic model switching + */ +export const executeAgentWithDynamicModel = action( { + args: { + agentId: v.id( "agents" ), + conversationId: v.optional( v.id( "interleavedConversations" ) ), + message: v.string(), + enableModelSwitching: v.optional( v.boolean() ), + preferCost: v.optional( v.boolean() ), + preferSpeed: v.optional( v.boolean() ), + preferCapability: v.optional( v.boolean() ), + }, + handler: async ( ctx, args ): Promise => { + try { + const agent = ( await ctx.runQuery( internal.strandsAgentExecution.getAgentInternal, { + agentId: args.agentId, + } ) ); + + if ( !agent ) { + throw new Error( "Agent not found" ); + } + + // Get conversation history + let history: ConversationMessage[] = []; + if ( args.conversationId ) { + history = ( await ctx.runQuery( internal.interleavedReasoning.getConversationHistory, { + conversationId: args.conversationId, + windowSize: 10, + } ) ) as ConversationMessage[]; + } + + // Get user tier for model switching decisions + const user = await ctx.runQuery( internal.users.getInternal, { id: agent.createdBy } ); + const userTier = ( user?.tier as "freemium" | "personal" | "enterprise" ) || "freemium"; + + // Burst rate limit: enforce tier-aware per-minute ceiling + const { checkRateLimit, buildTierRateLimitConfig } = await import( "./rateLimiter" ); + const { isProviderAllowedForTier, getTierConfig: getTierCfg } = await import( "./lib/tierConfig" ); + const tierCfg = getTierCfg( userTier ); + const rlCfg = buildTierRateLimitConfig( tierCfg.maxConcurrentTests, "agentExecution" ); + const rlResult = await checkRateLimit( ctx, String( agent.createdBy ), "agentExecution", rlCfg ); + if ( !rlResult.allowed ) { + return { + success: false, + error: rlResult.reason || "Rate limit exceeded. Please wait before running more executions.", + }; + } + + // Model gating: Block freemium users from Bedrock models. + // Positively detect Ollama models so Bedrock IDs with colons (e.g. + // "anthropic.claude-haiku-4-5-20251001-v1:0") are not misclassified. + const isOllamaModel = agent.deploymentType === "ollama" + || agent.model.toLowerCase().includes( "ollama" ) + || ( !agent.deploymentType && !agent.model.includes( "." ) && agent.model.includes( ":" ) ); + const isBedrock = !isOllamaModel; + if ( isBedrock && !isProviderAllowedForTier( userTier, "bedrock" ) ) { + return { + success: false, + error: "Bedrock models require a Personal subscription ($5/month). " + + "Use local Ollama models for free, or upgrade in Settings → Billing.", + }; + } + + // Execute with or without dynamic model switching + let result: AgentExecutionResult; + if ( args.enableModelSwitching === false ) { + result = await executeDirectBedrock( ctx, agent, args.message, history ); + } else { + result = await executeWithModelSwitching( ctx, agent, args.message, history, { + preferCost: args.preferCost, + preferSpeed: args.preferSpeed, + preferCapability: args.preferCapability, + userTier, + } ); + } + + // ─── Token-based metering ─────────────────────────────────────────── + if ( result.tokenUsage ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: agent.createdBy, + modelId: agent.model, + inputTokens: result.tokenUsage.inputTokens, + outputTokens: result.tokenUsage.outputTokens, + } ); + } + + return result; + } catch ( error: unknown ) { + console.error( "Agent execution error:", error ); + const message = error instanceof Error ? error.message : "Agent execution failed"; + return { + success: false, + error: message, + }; + } + }, +} ); + +/** + * Execute with dynamic model switching + */ +async function executeWithModelSwitching( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + history: ConversationMessage[], + options: { + preferCost?: boolean; + preferSpeed?: boolean; + preferCapability?: boolean; + userTier: "freemium" | "personal" | "enterprise"; + } +): Promise { + // Convert conversation history to simple format for analysis + const historyForAnalysis = history.map( ( msg ) => ( { + role: msg.role, + content: msg.content, + } ) ); + + // Make model switching decision + const decision = decideModelSwitch( message, historyForAnalysis, agent, options ); + + console.log( `[ModelSwitcher] Complexity: ${decision.complexityScore}/100` ); + console.log( `[ModelSwitcher] Selected: ${decision.selectedModel.name}` ); + console.log( `[ModelSwitcher] Reasoning: ${decision.reasoning}` ); + console.log( `[ModelSwitcher] Estimated cost: $${decision.estimatedCost.toFixed( 4 )}` ); + + // Execute with selected model + const result = await executeDirectBedrock( + ctx, + agent, + message, + history, + decision.selectedModel.modelId + ); + + // Add decision metadata to result + if ( result.success && result.metadata ) { + result.metadata.modelSwitchDecision = decision; + result.metadata.originalModel = agent.model; + } + + return result; +} + +/** + * Execute via direct Bedrock API + */ +async function executeDirectBedrock( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + history: ConversationMessage[], + overrideModelId?: string +): Promise { + const { BedrockRuntimeClient, InvokeModelCommand } = await import( + "@aws-sdk/client-bedrock-runtime" + ); + + const client = new BedrockRuntimeClient( { + region: process.env.AWS_REGION || "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + } ); + + const messages: Array<{ role: string; content: Array<{ text: string }> }> = []; + + for ( const msg of history ) { + // Only include user and assistant roles in messages array; + // system messages go to top-level system field, tool messages are skipped + if ( msg.role === "user" || msg.role === "assistant" ) { + messages.push( { + role: msg.role, + content: [{ text: msg.content }], + } ); + } + } + + messages.push( { + role: "user", + content: [{ text: message }], + } ); + + // Use override model if provided, otherwise use agent's model + const modelId = resolveBedrockModelId( overrideModelId || agent.model ); + + // Only include Claude/Anthropic-specific fields when using an Anthropic model + const isAnthropicModel = modelId.includes( "anthropic" ) || modelId.includes( "claude" ); + + // Branch payload format by provider: Anthropic Messages API vs generic Bedrock + let payload: Record; + + if ( isAnthropicModel ) { + payload = { + anthropic_version: "bedrock-2023-05-31", + max_tokens: 4096, + system: agent.systemPrompt, + messages: messages, + temperature: 1, + thinking: { + type: "enabled", + budget_tokens: 3000, + }, + }; + } else { + // Non-Anthropic Bedrock models (Llama, Mistral, etc.) use a plain + // prompt-based payload compatible with InvokeModelCommand. + const promptText = messages.map( + ( m: { role: string; content: Array<{ text: string }> } ) => + `${m.role}: ${m.content.map( ( c ) => c.text ).join( "" )}` + ).join( "\n" ); + const systemPrefix = agent.systemPrompt ? `system: ${agent.systemPrompt}\n` : ""; + + if ( modelId.includes( "meta" ) || modelId.includes( "llama" ) ) { + // Meta Llama format + payload = { + prompt: `${systemPrefix}${promptText}\nassistant:`, + max_gen_len: 4096, + temperature: 0.7, + }; + } else if ( modelId.includes( "mistral" ) ) { + // Mistral format + payload = { + prompt: `[INST] ${systemPrefix}${promptText} [/INST]`, + max_tokens: 4096, + temperature: 0.7, + }; + } else { + // Generic Bedrock model fallback (Cohere, AI21, etc.) + payload = { + prompt: `${systemPrefix}${promptText}\nassistant:`, + max_tokens: 4096, + temperature: 0.7, + }; + } + } + + const command = new InvokeModelCommand( { + modelId: modelId, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify( payload ), + } ); + + const response = await client.send( command ); + const responseBody = JSON.parse( new TextDecoder().decode( response.body ) ) as BedrockInvokeResponse; + + let content = ""; + let reasoning = ""; + const toolCalls: ToolCall[] = []; + + if ( responseBody.content && Array.isArray( responseBody.content ) ) { + // Anthropic models: content is an array of typed blocks + for ( const block of responseBody.content ) { + if ( block.type === "text" ) { + content += block.text; + } else if ( block.type === "thinking" ) { + reasoning += block.thinking; + } else if ( block.type === "tool_use" ) { + const id = typeof block.id === "string" ? block.id : undefined; + const name = typeof block.name === "string" ? block.name : undefined; + toolCalls.push( { + id, + name, + input: block.input, + } ); + } + } + } else if ( typeof responseBody.generation === "string" ) { + // Meta/Llama models: single generation string + content = responseBody.generation; + } else if ( responseBody.outputs && Array.isArray( responseBody.outputs ) ) { + // Mistral models: outputs array with text fields + content = responseBody.outputs.map( ( o: any ) => o.text || "" ).join( "" ); + } else if ( responseBody.generations && Array.isArray( responseBody.generations ) ) { + // Cohere models: generations array + content = responseBody.generations.map( ( g: any ) => g.text || "" ).join( "" ); + } else if ( responseBody.completions && Array.isArray( responseBody.completions ) ) { + // AI21 models: completions array + content = responseBody.completions.map( ( c: any ) => c.data?.text || "" ).join( "" ); + } else if ( responseBody.results && Array.isArray( responseBody.results ) ) { + // Amazon Titan models: results array + content = responseBody.results.map( ( r: any ) => r.outputText || "" ).join( "" ); + } else { + // Fallback: try to extract text from any string field in the response + const raw = new TextDecoder().decode( response.body ); + console.warn( `Unrecognized Bedrock response format for model ${modelId}. Raw preview: ${raw.slice( 0, 200 )}` ); + // Attempt to use the raw body as text if it looks like a plain string + try { + const parsed = JSON.parse( raw ); + content = typeof parsed === "string" ? parsed : JSON.stringify( parsed ); + } catch { + content = raw; + } + } + + // ─── Token extraction for billing ─────────────────────────────────────── + const { extractTokenUsage, estimateTokenUsage } = await import( "./lib/tokenBilling" ); + let tokenUsage = extractTokenUsage( responseBody, modelId ); + + // Fallback: estimate from text when provider doesn't return counts + if ( tokenUsage.totalTokens === 0 ) { + const inputText = JSON.stringify( payload ); + tokenUsage = estimateTokenUsage( inputText, content ); + } + + return { + success: true, + content: content.trim(), + reasoning: reasoning.trim() || undefined, + toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + tokenUsage, + metadata: { + model: modelId, + modelProvider: "bedrock", + executionMethod: "direct-bedrock-api", + }, + }; +} diff --git a/convex/strandsAgentsTools.ts b/convex/strandsAgentsTools.ts new file mode 100644 index 0000000..dbd4e43 --- /dev/null +++ b/convex/strandsAgentsTools.ts @@ -0,0 +1,1294 @@ +/** + * StrandsAgents Tool Definitions + * + * These are the actual functional components that power the visual scripting system. + * Each tool is an "Agent as a Tool" that can be composed together. + */ + +import { v } from "convex/values"; + +/** + * CHATBOT COMPONENTS + */ + +export const conversationalChatbotTool = { + name: "conversational_chatbot", + type: "chatbot_core", + description: "Interactive chatbot with memory and reasoning capabilities", + icon: "💬", + color: "#3B82F6", // blue + category: "Chatbot Components", + + parameters: v.object({ + agentName: v.string(), + personality: v.string(), + memoryType: v.union(v.literal("short_term"), v.literal("long_term"), v.literal("hybrid")), + contextWindow: v.number(), + systemPrompt: v.string(), + tools: v.array(v.string()), + }), + + inputs: ["user_message", "conversation_history"], + outputs: ["response", "updated_context", "reasoning"], + + // This will be implemented in the agent code generator + strandsAgentCode: ` +@agent +class ConversationalChatbot: + def __init__(self, config): + self.name = config['agentName'] + self.personality = config['personality'] + self.memory = MemoryManager(config['memoryType']) + self.context_window = config['contextWindow'] + self.system_prompt = config['systemPrompt'] + + @tool + def process_message(self, user_message: str, conversation_history: list) -> dict: + # Retrieve relevant context + context = self.memory.retrieve(user_message, self.context_window) + + # Build prompt with context + prompt = f"{self.system_prompt}\\n\\nContext: {context}\\n\\nUser: {user_message}" + + # Process with reasoning + response = self.invoke_model(prompt) + + # Update memory + self.memory.store(user_message, response) + + return { + "response": response, + "context": context, + "reasoning": self.get_reasoning() + } + `, +}; + +export const chatbotWorkerTool = { + name: "chatbot_worker", + type: "specialized_agent", + description: "Specialized worker agent for specific domains", + icon: "🤖", + color: "#EF4444", // red + category: "Chatbot Components", + + parameters: v.object({ + workerName: v.string(), + domain: v.string(), // "customer_support", "technical", "research" + expertise: v.array(v.string()), + systemPrompt: v.string(), + tools: v.array(v.string()), + }), + + inputs: ["task", "context"], + outputs: ["result", "confidence", "next_actions"], + + strandsAgentCode: ` +@agent +class ChatbotWorker: + def __init__(self, config): + self.name = config['workerName'] + self.domain = config['domain'] + self.expertise = config['expertise'] + self.system_prompt = config['systemPrompt'] + + @tool + def execute_task(self, task: str, context: dict) -> dict: + # Validate task is within expertise + if not self.can_handle(task): + return {"error": "Task outside expertise", "should_route": True} + + # Execute specialized logic + result = self.process_with_domain_knowledge(task, context) + + return { + "result": result, + "confidence": self.calculate_confidence(), + "next_actions": self.suggest_follow_ups() + } + `, +}; + +export const chatbotMemoryTool = { + name: "chatbot_memory", + type: "context_storage", + description: "Memory management with context retrieval", + icon: "🧠", + color: "#8B5CF6", // purple + category: "Chatbot Components", + + parameters: v.object({ + storageType: v.union(v.literal("convex"), v.literal("s3"), v.literal("hybrid")), + retrievalStrategy: v.union(v.literal("similarity"), v.literal("recency"), v.literal("importance")), + maxContextSize: v.number(), + compressionEnabled: v.boolean(), + }), + + inputs: ["query", "conversation_id"], + outputs: ["relevant_context", "metadata"], + + strandsAgentCode: ` +@tool +def retrieve_memory(query: str, conversation_id: str, config: dict) -> dict: + memory_store = MemoryStore(config['storageType']) + + # Retrieve based on strategy + if config['retrievalStrategy'] == 'similarity': + context = memory_store.similarity_search(query, conversation_id) + elif config['retrievalStrategy'] == 'recency': + context = memory_store.get_recent(conversation_id, limit=config['maxContextSize']) + else: + context = memory_store.importance_ranked(query, conversation_id) + + # Compress if needed + if config['compressionEnabled'] and len(context) > config['maxContextSize']: + context = compress_context(context, config['maxContextSize']) + + return { + "relevant_context": context, + "metadata": {"source": "memory", "relevance_score": calculate_relevance(query, context)} + } + `, +}; + +/** + * PROMPT MANAGEMENT TOOLS + */ + +export const promptTemplateTool = { + name: "prompt_template", + type: "reusable_prompt", + description: "Reusable prompt template with variables", + icon: "📝", + color: "#10B981", // green + category: "Prompt Tools", + + parameters: v.object({ + templateName: v.string(), + template: v.string(), // "Hello {name}, how can I help with {topic}?" + variables: v.array(v.object({ + name: v.string(), + type: v.string(), + default: v.optional(v.string()), + required: v.boolean(), + })), + examples: v.array(v.object({ + input: v.any(), + output: v.string(), + })), + }), + + inputs: ["variable_values"], + outputs: ["rendered_prompt"], + + strandsAgentCode: ` +@tool +def render_prompt_template(template: str, variables: dict, variable_values: dict) -> str: + rendered = template + + # Validate required variables + required_vars = [v['name'] for v in variables if v['required']] + missing = [v for v in required_vars if v not in variable_values] + if missing: + raise ValueError(f"Missing required variables: {missing}") + + # Replace variables + for var_name, var_value in variable_values.items(): + placeholder = f"{{{var_name}}}" + rendered = rendered.replace(placeholder, str(var_value)) + + return rendered + `, +}; + +export const promptChainTool = { + name: "prompt_chain", + type: "sequential_prompts", + description: "Chain multiple prompts sequentially", + icon: "⛓️", + color: "#F59E0B", // amber + category: "Prompt Tools", + + parameters: v.object({ + chainName: v.string(), + prompts: v.array(v.object({ + id: v.string(), + template: v.string(), + extractOutput: v.string(), // JSONPath or regex to extract from response + })), + passThroughContext: v.boolean(), + }), + + inputs: ["initial_input"], + outputs: ["final_output", "intermediate_results"], + + strandsAgentCode: ` +@tool +def execute_prompt_chain(prompts: list, initial_input: dict, config: dict) -> dict: + context = initial_input + intermediate_results = [] + + for prompt_config in prompts: + # Render prompt with current context + prompt = render_template(prompt_config['template'], context) + + # Execute + response = invoke_model(prompt) + + # Extract output + extracted = extract_value(response, prompt_config['extractOutput']) + + intermediate_results.append({ + "prompt_id": prompt_config['id'], + "response": response, + "extracted": extracted + }) + + # Update context for next prompt + if config['passThroughContext']: + context.update(extracted) + + return { + "final_output": intermediate_results[-1]['extracted'], + "intermediate_results": intermediate_results + } + `, +}; + +export const thoughtBuilderTool = { + name: "thought_builder", + type: "reasoning_prompt", + description: "Build explicit reasoning chains (Chain-of-Thought)", + icon: "🧠", + color: "#EC4899", // pink + category: "Prompt Tools", + + parameters: v.object({ + thoughtName: v.string(), + steps: v.array(v.object({ + name: v.string(), + instruction: v.string(), + requiresEvidence: v.boolean(), + })), + enableSelfCorrection: v.boolean(), + }), + + inputs: ["problem"], + outputs: ["reasoning_chain", "final_answer", "confidence"], + + strandsAgentCode: ` +@tool +def build_thought_chain(problem: str, steps: list, config: dict) -> dict: + reasoning_chain = [] + current_understanding = problem + + for step in steps: + # Generate reasoning for this step + thought_prompt = f"{step['instruction']}\\n\\nProblem: {current_understanding}" + + if step['requiresEvidence']: + thought_prompt += "\\n\\nProvide evidence for your reasoning." + + step_reasoning = invoke_model(thought_prompt) + reasoning_chain.append({ + "step": step['name'], + "reasoning": step_reasoning + }) + + # Update understanding + current_understanding = extract_conclusion(step_reasoning) + + # Self-correction loop + if config['enableSelfCorrection']: + confidence = evaluate_reasoning_quality(reasoning_chain) + if confidence < 0.8: + # Re-run with corrections + corrections = identify_weak_reasoning(reasoning_chain) + reasoning_chain = apply_corrections(reasoning_chain, corrections) + + return { + "reasoning_chain": reasoning_chain, + "final_answer": current_understanding, + "confidence": calculate_confidence(reasoning_chain) + } + `, +}; + +/** + * CONNECTOR TOOLS + */ + +export const mlConnectorTool = { + name: "ml_connector", + type: "model_interface", + description: "Connect to ML models (Bedrock, OpenAI, Ollama)", + icon: "🔮", + color: "#6366F1", // indigo + category: "Connectors", + + parameters: v.object({ + provider: v.union(v.literal("bedrock"), v.literal("openai"), v.literal("ollama")), + model: v.string(), + temperature: v.number(), + maxTokens: v.number(), + trackCost: v.boolean(), + enableCaching: v.boolean(), + }), + + inputs: ["prompt", "system_prompt"], + outputs: ["response", "usage", "cost"], + + strandsAgentCode: ` +@tool +def invoke_ml_model(prompt: str, system_prompt: str, config: dict) -> dict: + provider = get_provider(config['provider']) + + # Build request + request = { + "model": config['model'], + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ], + "temperature": config['temperature'], + "max_tokens": config['maxTokens'] + } + + # Check cache + if config['enableCaching']: + cached = check_cache(request) + if cached: + return cached + + # Invoke model + response = provider.invoke(request) + + # Track cost + cost = 0 + if config['trackCost']: + cost = calculate_cost(config['model'], response['usage']) + + # Cache response + if config['enableCaching']: + cache_response(request, response) + + return { + "response": response['content'], + "usage": response['usage'], + "cost": cost + } + `, +}; + +export const dataConnectorTool = { + name: "data_connector", + type: "data_source", + description: "Connect to data sources (DB, API, Files)", + icon: "🗄️", + color: "#14B8A6", // teal + category: "Connectors", + + parameters: v.object({ + sourceType: v.union(v.literal("database"), v.literal("api"), v.literal("file"), v.literal("stream")), + connectionConfig: v.any(), + queryTemplate: v.optional(v.string()), + transformations: v.array(v.string()), + }), + + inputs: ["query_params"], + outputs: ["data", "metadata"], + + strandsAgentCode: ` +@tool +def fetch_data(query_params: dict, config: dict) -> dict: + connector = create_connector(config['sourceType'], config['connectionConfig']) + + # Build query + if config.get('queryTemplate'): + query = render_template(config['queryTemplate'], query_params) + else: + query = query_params + + # Fetch data + raw_data = connector.fetch(query) + + # Apply transformations + transformed_data = raw_data + for transform in config['transformations']: + transformed_data = apply_transformation(transformed_data, transform) + + return { + "data": transformed_data, + "metadata": { + "source": config['sourceType'], + "record_count": len(transformed_data), + "timestamp": get_timestamp() + } + } + `, +}; + +/** + * REASONING PATTERN TOOLS + */ + +export const chainOfThoughtTool = { + name: "chain_of_thought", + type: "sequential_reasoning", + description: "Step-by-step reasoning with explicit thinking", + icon: "🧠", + color: "#A855F7", // violet + category: "Reasoning Patterns", + + parameters: v.object({ + enableThinking: v.boolean(), + thinkingBudget: v.number(), // tokens for thinking + requireEvidence: v.boolean(), + showReasoning: v.boolean(), + }), + + inputs: ["problem"], + outputs: ["reasoning", "answer", "confidence"], + + strandsAgentCode: ` +@tool +def chain_of_thought_reasoning(problem: str, config: dict) -> dict: + prompt = f"""Let's solve this step by step. + +Problem: {problem} + +{"Think through each step carefully and provide evidence for your reasoning." if config['requireEvidence'] else "Think through each step carefully."} + +Step 1:""" + + # Invoke with extended thinking + response = invoke_model( + prompt, + thinking_enabled=config['enableThinking'], + thinking_budget=config['thinkingBudget'] + ) + + # Extract reasoning steps + reasoning_steps = parse_reasoning_steps(response) + final_answer = extract_final_answer(response) + confidence = calculate_confidence_from_reasoning(reasoning_steps) + + result = { + "answer": final_answer, + "confidence": confidence + } + + if config['showReasoning']: + result["reasoning"] = reasoning_steps + + return result + `, +}; + +export const ragSystemTool = { + name: "rag_system", + type: "retrieval_augmented", + description: "Retrieval Augmented Generation with knowledge base", + icon: "📚", + color: "#F97316", // orange + category: "Reasoning Patterns", + + parameters: v.object({ + knowledgeBaseId: v.string(), + retrievalStrategy: v.union(v.literal("semantic"), v.literal("keyword"), v.literal("hybrid")), + topK: v.number(), + rerank: v.boolean(), + citeSources: v.boolean(), + }), + + inputs: ["query"], + outputs: ["response", "sources", "relevance_scores"], + + strandsAgentCode: ` +@tool +def rag_generation(query: str, config: dict) -> dict: + kb = KnowledgeBase(config['knowledgeBaseId']) + + # Retrieve relevant documents + if config['retrievalStrategy'] == 'semantic': + docs = kb.semantic_search(query, top_k=config['topK']) + elif config['retrievalStrategy'] == 'keyword': + docs = kb.keyword_search(query, top_k=config['topK']) + else: + docs = kb.hybrid_search(query, top_k=config['topK']) + + # Rerank if enabled + if config['rerank']: + docs = rerank_documents(docs, query) + + # Build augmented prompt + context = "\\n\\n".join([doc['content'] for doc in docs]) + prompt = f"""Based on the following information, answer the query. + +Context: +{context} + +Query: {query} + +{"Please cite your sources." if config['citeSources'] else ""} + +Answer:""" + + response = invoke_model(prompt) + + return { + "response": response, + "sources": [{"id": doc['id'], "title": doc['title'], "relevance": doc['score']} for doc in docs], + "relevance_scores": [doc['score'] for doc in docs] + } + `, +}; + +export const reactLoopTool = { + name: "react_loop", + type: "reasoning_action", + description: "Reasoning + Acting loop for complex tasks", + icon: "🔄", + color: "#06B6D4", // cyan + category: "Reasoning Patterns", + + parameters: v.object({ + maxIterations: v.number(), + tools: v.array(v.string()), + requireConfidence: v.number(), // minimum confidence to stop + enableReflection: v.boolean(), + }), + + inputs: ["goal"], + outputs: ["result", "action_history", "reasoning_history"], + + strandsAgentCode: ` +@tool +def react_loop(goal: str, config: dict) -> dict: + action_history = [] + reasoning_history = [] + current_state = {"goal": goal, "progress": []} + + for iteration in range(config['maxIterations']): + # Reason about current state + reasoning_prompt = f"""Goal: {goal} + +Current state: {current_state} + +What should we do next? Reason step by step.""" + + reasoning = invoke_model(reasoning_prompt) + reasoning_history.append(reasoning) + + # Decide action + action = extract_action(reasoning) + + # Execute action + tool = get_tool(action['tool_name']) + result = tool.execute(action['parameters']) + + action_history.append({ + "iteration": iteration, + "action": action, + "result": result + }) + + # Update state + current_state['progress'].append(result) + + # Check if goal achieved + confidence = evaluate_goal_completion(goal, current_state) + if confidence >= config['requireConfidence']: + break + + # Reflection (if enabled) + if config['enableReflection']: + reflection = reflect_on_progress(action_history, goal) + current_state['reflection'] = reflection + + return { + "result": current_state, + "action_history": action_history, + "reasoning_history": reasoning_history + } + `, +}; + +/** + * HUMAN-IN-THE-LOOP TOOLS + */ + +export const handoffToUserTool = { + name: "handoff_to_user", + type: "human_interaction", + description: "Hand off control to human for input or decision", + icon: "👤", + color: "#EF4444", // red + category: "Human-in-the-Loop", + + parameters: v.object({ + question: v.string(), + options: v.optional(v.array(v.string())), + requireConfirmation: v.boolean(), + timeout: v.optional(v.number()), // seconds to wait + context: v.optional(v.any()), + }), + + inputs: ["current_state"], + outputs: ["user_response", "timestamp"], + + strandsAgentCode: ` +@tool +def handoff_to_user(current_state: dict, config: dict) -> dict: + # Build handoff message + message = { + "type": "human_input_required", + "question": config['question'], + "context": config.get('context', current_state), + "timestamp": get_timestamp() + } + + if config.get('options'): + message['options'] = config['options'] + + # Store pending handoff in database + handoff_id = store_handoff(message) + + # Wait for user response (with timeout) + timeout = config.get('timeout', 300) # default 5 min + user_response = wait_for_user_input(handoff_id, timeout) + + # Require confirmation if needed + if config['requireConfirmation']: + confirmation = confirm_with_user(user_response) + if not confirmation: + # Re-prompt + return handoff_to_user(current_state, config) + + return { + "user_response": user_response, + "timestamp": get_timestamp(), + "handoff_id": handoff_id + } + `, +}; + +/** + * MEMORY TOOLS + */ + +export const shortTermMemoryTool = { + name: "short_term_memory", + type: "memory_storage", + description: "Store and retrieve short-term conversation memory", + icon: "💭", + color: "#8B5CF6", // purple + category: "Memory Tools", + + parameters: v.object({ + maxItems: v.number(), + ttl: v.number(), // time to live in seconds + compressionThreshold: v.number(), + }), + + inputs: ["operation", "key", "value"], + outputs: ["result", "memory_state"], + + strandsAgentCode: ` +@tool +def short_term_memory(operation: str, key: str, value: any, config: dict) -> dict: + memory = ShortTermMemory( + max_items=config['maxItems'], + ttl=config['ttl'] + ) + + if operation == "store": + memory.store(key, value) + + # Compress if threshold reached + if memory.size() > config['compressionThreshold']: + memory.compress() + + return {"result": "stored", "memory_state": memory.get_state()} + + elif operation == "retrieve": + result = memory.retrieve(key) + return {"result": result, "memory_state": memory.get_state()} + + elif operation == "search": + results = memory.search(value) # value is search query + return {"result": results, "memory_state": memory.get_state()} + + elif operation == "clear": + memory.clear() + return {"result": "cleared", "memory_state": memory.get_state()} + `, +}; + +export const longTermMemoryTool = { + name: "long_term_memory", + type: "memory_storage", + description: "Store and retrieve long-term persistent memory", + icon: "🧠", + color: "#A855F7", // violet + category: "Memory Tools", + + parameters: v.object({ + storageBackend: v.union(v.literal("convex"), v.literal("s3"), v.literal("vector_db")), + indexingStrategy: v.union(v.literal("semantic"), v.literal("keyword"), v.literal("hybrid")), + enableVersioning: v.boolean(), + }), + + inputs: ["operation", "key", "value", "metadata"], + outputs: ["result", "version"], + + strandsAgentCode: ` +@tool +def long_term_memory(operation: str, key: str, value: any, metadata: dict, config: dict) -> dict: + memory = LongTermMemory( + backend=config['storageBackend'], + indexing=config['indexingStrategy'] + ) + + if operation == "store": + version = memory.store(key, value, metadata) + + # Create semantic index + if config['indexingStrategy'] in ['semantic', 'hybrid']: + memory.create_embedding(key, value) + + return {"result": "stored", "version": version} + + elif operation == "retrieve": + result = memory.retrieve(key) + + # Get version history if enabled + if config['enableVersioning']: + history = memory.get_versions(key) + return {"result": result, "version": history[-1], "history": history} + + return {"result": result} + + elif operation == "search": + if config['indexingStrategy'] == 'semantic': + results = memory.semantic_search(value, top_k=10) + elif config['indexingStrategy'] == 'keyword': + results = memory.keyword_search(value) + else: + results = memory.hybrid_search(value) + + return {"result": results} + + elif operation == "delete": + memory.delete(key) + return {"result": "deleted"} + `, +}; + +export const semanticMemoryTool = { + name: "semantic_memory", + type: "memory_storage", + description: "Semantic search over memory with embeddings", + icon: "🔍", + color: "#EC4899", // pink + category: "Memory Tools", + + parameters: v.object({ + embeddingModel: v.string(), // "text-embedding-3-small", "amazon.titan-embed-text-v1" + vectorDimensions: v.number(), + similarityThreshold: v.number(), + rerankResults: v.boolean(), + }), + + inputs: ["query", "filters"], + outputs: ["results", "relevance_scores"], + + strandsAgentCode: ` +@tool +def semantic_memory_search(query: str, filters: dict, config: dict) -> dict: + # Generate query embedding + embedder = get_embedding_model(config['embeddingModel']) + query_embedding = embedder.embed(query) + + # Search vector store + vector_store = VectorMemory(dimensions=config['vectorDimensions']) + results = vector_store.similarity_search( + query_embedding, + filters=filters, + threshold=config['similarityThreshold'] + ) + + # Rerank if enabled + if config['rerankResults']: + reranker = get_reranker() + results = reranker.rerank(query, results) + + return { + "results": [r['content'] for r in results], + "relevance_scores": [r['score'] for r in results], + "metadata": [r['metadata'] for r in results] + } + `, +}; + +/** + * ADVANCED REASONING PATTERN TOOLS + */ + +export const selfConsistencyTool = { + name: "self_consistency", + type: "voting_mechanism", + description: "Multi-path reasoning with voting for consistency", + icon: "✅", + color: "#10B981", // green + category: "Reasoning Patterns", + + parameters: v.object({ + numPaths: v.number(), // how many reasoning paths + votingStrategy: v.union(v.literal("majority"), v.literal("weighted"), v.literal("consensus")), + diversityPenalty: v.number(), // penalize similar paths + requireAgreement: v.number(), // minimum agreement threshold + }), + + inputs: ["problem"], + outputs: ["final_answer", "confidence", "reasoning_paths", "vote_distribution"], + + strandsAgentCode: ` +@tool +def self_consistency_reasoning(problem: str, config: dict) -> dict: + reasoning_paths = [] + answers = [] + + # Generate multiple reasoning paths + for i in range(config['numPaths']): + # Use temperature sampling for diversity + path = invoke_model( + f"Solve this problem with reasoning:\\n{problem}", + temperature=0.7 + (i * 0.1) # increase diversity + ) + + reasoning_paths.append(path) + answer = extract_answer(path) + answers.append(answer) + + # Apply diversity penalty + if config['diversityPenalty'] > 0: + answers = penalize_duplicates(answers, config['diversityPenalty']) + + # Vote on answers + if config['votingStrategy'] == 'majority': + final_answer, votes = majority_vote(answers) + elif config['votingStrategy'] == 'weighted': + # Weight by reasoning quality + weights = [score_reasoning_quality(path) for path in reasoning_paths] + final_answer, votes = weighted_vote(answers, weights) + else: # consensus + final_answer, votes = consensus_vote(answers, reasoning_paths) + + # Calculate confidence + agreement = votes[final_answer] / len(answers) + + if agreement < config['requireAgreement']: + # Not enough agreement, generate more paths + return self_consistency_reasoning(problem, { + **config, + 'numPaths': config['numPaths'] + 2 + }) + + return { + "final_answer": final_answer, + "confidence": agreement, + "reasoning_paths": reasoning_paths, + "vote_distribution": votes + } + `, +}; + +export const treeOfThoughtsTool = { + name: "tree_of_thoughts", + type: "branching_reasoning", + description: "Explore multiple reasoning branches like a tree", + icon: "🌳", + color: "#F59E0B", // amber + category: "Reasoning Patterns", + + parameters: v.object({ + maxDepth: v.number(), + branchingFactor: v.number(), // thoughts per level + evaluationStrategy: v.union(v.literal("value"), v.literal("vote"), v.literal("hybrid")), + pruningThreshold: v.number(), // prune bad branches + explorationBonus: v.number(), // encourage exploration + }), + + inputs: ["problem"], + outputs: ["best_path", "confidence", "tree_structure"], + + strandsAgentCode: ` +@tool +def tree_of_thoughts(problem: str, config: dict) -> dict: + # Initialize tree root + root = ThoughtNode( + content=problem, + depth=0, + value=0 + ) + + # Build tree with BFS/DFS + frontier = [root] + explored = [] + + while frontier and len(explored) < config['maxDepth']: + node = frontier.pop(0) + + # Generate child thoughts + children = [] + for i in range(config['branchingFactor']): + thought_prompt = f"""Given this problem and current reasoning: + +Problem: {problem} +Current thought: {node.content} + +What's the next step? Generate thought {i+1}:""" + + child_thought = invoke_model(thought_prompt) + + # Evaluate thought + if config['evaluationStrategy'] == 'value': + value = evaluate_thought_value(child_thought, problem) + elif config['evaluationStrategy'] == 'vote': + value = vote_on_thought(child_thought, problem) + else: + value = hybrid_evaluation(child_thought, problem) + + # Add exploration bonus + value += config['explorationBonus'] * (1 / (node.depth + 1)) + + child = ThoughtNode( + content=child_thought, + depth=node.depth + 1, + value=value, + parent=node + ) + children.append(child) + + # Prune low-value branches + children = [c for c in children if c.value >= config['pruningThreshold']] + + # Add to frontier (sorted by value) + frontier.extend(sorted(children, key=lambda x: x.value, reverse=True)) + explored.append(node) + + # Find best path + leaf_nodes = [n for n in explored if n.depth == config['maxDepth'] or not n.children] + best_leaf = max(leaf_nodes, key=lambda x: x.value) + + # Trace path back to root + best_path = [] + current = best_leaf + while current: + best_path.insert(0, current.content) + current = current.parent + + return { + "best_path": best_path, + "confidence": best_leaf.value, + "tree_structure": serialize_tree(root) + } + `, +}; + +export const reflexionTool = { + name: "reflexion", + type: "self_improvement", + description: "Self-reflection and iterative improvement", + icon: "🪞", + color: "#06B6D4", // cyan + category: "Reasoning Patterns", + + parameters: v.object({ + maxIterations: v.number(), + improvementThreshold: v.number(), // stop if improvement < threshold + critiqueLevels: v.array(v.string()), // ["logic", "evidence", "clarity"] + enableMemory: v.boolean(), // remember past mistakes + }), + + inputs: ["task"], + outputs: ["final_result", "iteration_history", "improvements"], + + strandsAgentCode: ` +@tool +def reflexion_improvement(task: str, config: dict) -> dict: + iteration_history = [] + current_solution = None + memory = ReflexionMemory() if config['enableMemory'] else None + + for iteration in range(config['maxIterations']): + # Generate solution + if current_solution is None: + solution = invoke_model(f"Solve this task:\\n{task}") + else: + # Use reflection to improve + solution = invoke_model(f"""Previous solution:\\n{current_solution} + +Critiques:\\n{critique} + +Improve the solution:""") + + # Self-critique on multiple levels + critiques = {} + for level in config['critiqueLevels']: + critique_prompt = f"""Critique this solution on {level}: + +Task: {task} +Solution: {solution} + +Provide specific critiques:""" + + critiques[level] = invoke_model(critique_prompt) + + # Calculate improvement score + if current_solution: + improvement = calculate_improvement(current_solution, solution, critiques) + + if improvement < config['improvementThreshold']: + break # converged + + # Store in memory + if memory: + memory.store_attempt(solution, critiques) + + iteration_history.append({ + "iteration": iteration, + "solution": solution, + "critiques": critiques + }) + + current_solution = solution + critique = "\\n".join(critiques.values()) + + # Extract lessons learned + improvements = [] + if memory: + improvements = memory.extract_lessons() + + return { + "final_result": current_solution, + "iteration_history": iteration_history, + "improvements": improvements + } + `, +}; + +export const mapReduceTool = { + name: "map_reduce", + type: "parallel_processing", + description: "Parallel processing with aggregation (Map-Reduce)", + icon: "🗺️", + color: "#14B8A6", // teal + category: "Reasoning Patterns", + + parameters: v.object({ + chunkSize: v.number(), + mapPrompt: v.string(), // prompt for map phase + reducePrompt: v.string(), // prompt for reduce phase + parallelism: v.number(), // max parallel tasks + aggregationStrategy: v.union(v.literal("concatenate"), v.literal("summarize"), v.literal("vote")), + }), + + inputs: ["data"], + outputs: ["result", "intermediate_results"], + + strandsAgentCode: ` +@tool +def map_reduce_processing(data: list, config: dict) -> dict: + # MAP PHASE: Split data into chunks + chunks = split_into_chunks(data, config['chunkSize']) + + # Process chunks in parallel + map_results = [] + for i in range(0, len(chunks), config['parallelism']): + batch = chunks[i:i + config['parallelism']] + + # Parallel invocation + batch_results = parallel_invoke([ + f"{config['mapPrompt']}\\n\\nData: {chunk}" + for chunk in batch + ]) + + map_results.extend(batch_results) + + # REDUCE PHASE: Aggregate results + if config['aggregationStrategy'] == 'concatenate': + final_result = "\\n\\n".join(map_results) + + elif config['aggregationStrategy'] == 'summarize': + # Hierarchical reduction if too many results + while len(map_results) > 1: + reduced = [] + for i in range(0, len(map_results), 2): + pair = map_results[i:i+2] + summary = invoke_model(f"{config['reducePrompt']}\\n\\n{pair}") + reduced.append(summary) + map_results = reduced + + final_result = map_results[0] + + else: # vote + final_result = majority_vote(map_results) + + return { + "result": final_result, + "intermediate_results": map_results + } + `, +}; + +export const parallelPromptsTool = { + name: "parallel_prompts", + type: "async_processing", + description: "Execute multiple prompts in parallel for speed", + icon: "⚡", + color: "#F97316", // orange + category: "Prompt Tools", + + parameters: v.object({ + prompts: v.array(v.object({ + id: v.string(), + template: v.string(), + priority: v.number(), + })), + maxParallelism: v.number(), + timeoutMs: v.number(), + failureStrategy: v.union(v.literal("skip"), v.literal("retry"), v.literal("fallback")), + }), + + inputs: ["input_data"], + outputs: ["results", "timings", "failures"], + + strandsAgentCode: ` +@tool +def parallel_prompts_execution(input_data: dict, config: dict) -> dict: + prompts = config['prompts'] + + # Sort by priority + prompts = sorted(prompts, key=lambda p: p['priority'], reverse=True) + + # Execute in batches + results = {} + timings = {} + failures = [] + + for i in range(0, len(prompts), config['maxParallelism']): + batch = prompts[i:i + config['maxParallelism']] + + # Render prompts with input data + rendered = [ + render_template(p['template'], input_data) + for p in batch + ] + + # Parallel execution with timeout + start_time = time.now() + batch_results = parallel_invoke_with_timeout( + rendered, + timeout_ms=config['timeoutMs'] + ) + end_time = time.now() + + # Handle results and failures + for prompt, result in zip(batch, batch_results): + if result.is_error(): + failures.append(prompt['id']) + + if config['failureStrategy'] == 'retry': + retry_result = invoke_model(rendered[batch.index(prompt)]) + if not retry_result.is_error(): + results[prompt['id']] = retry_result + elif config['failureStrategy'] == 'fallback': + results[prompt['id']] = get_fallback_value(prompt) + # skip = don't add to results + else: + results[prompt['id']] = result + + timings[prompt['id']] = end_time - start_time + + return { + "results": results, + "timings": timings, + "failures": failures + } + `, +}; + +/** + * Export all tools as registry + */ +export const STRANDS_TOOLS_REGISTRY = { + // Chatbot Components + conversational_chatbot: conversationalChatbotTool, + chatbot_worker: chatbotWorkerTool, + chatbot_memory: chatbotMemoryTool, + + // Prompt Tools + prompt_template: promptTemplateTool, + prompt_chain: promptChainTool, + thought_builder: thoughtBuilderTool, + parallel_prompts: parallelPromptsTool, + + // Connectors + ml_connector: mlConnectorTool, + data_connector: dataConnectorTool, + + // Reasoning Patterns + chain_of_thought: chainOfThoughtTool, + rag_system: ragSystemTool, + react_loop: reactLoopTool, + self_consistency: selfConsistencyTool, + tree_of_thoughts: treeOfThoughtsTool, + reflexion: reflexionTool, + map_reduce: mapReduceTool, + + // Human-in-the-Loop + handoff_to_user: handoffToUserTool, + + // Memory Tools + short_term_memory: shortTermMemoryTool, + long_term_memory: longTermMemoryTool, + semantic_memory: semanticMemoryTool, +}; + +/** + * Tool categories for sidebar + */ +export const TOOL_CATEGORIES = [ + { + name: "Chatbot Components", + icon: "💬", + tools: ["conversational_chatbot", "chatbot_worker", "chatbot_memory"], + }, + { + name: "Prompt Tools", + icon: "📝", + tools: ["prompt_template", "prompt_chain", "thought_builder", "parallel_prompts"], + }, + { + name: "Connectors", + icon: "🔌", + tools: ["ml_connector", "data_connector"], + }, + { + name: "Reasoning Patterns", + icon: "🧠", + tools: ["chain_of_thought", "rag_system", "react_loop", "self_consistency", "tree_of_thoughts", "reflexion", "map_reduce"], + }, + { + name: "Human-in-the-Loop", + icon: "👤", + tools: ["handoff_to_user"], + }, + { + name: "Memory Tools", + icon: "💭", + tools: ["short_term_memory", "long_term_memory", "semantic_memory"], + }, +]; diff --git a/convex/stripe.ts b/convex/stripe.ts new file mode 100644 index 0000000..0d6ff14 --- /dev/null +++ b/convex/stripe.ts @@ -0,0 +1,208 @@ +"use node"; + +/** + * Stripe Actions - Client-callable and internal actions that need Node.js runtime. + * + * "use node" files can ONLY contain action / internalAction exports. + * Mutations and queries live in stripeMutations.ts (standard Convex runtime). + * + * Actions (client-callable, authenticated): + * - createCheckoutSession: Redirects user to Stripe Checkout for $5/mo subscription + * - createPortalSession: Redirects user to Stripe Customer Portal for self-service + * + * Internal actions (NOT client-callable): + * - reportUsage: Reports metered execution overage to Stripe + */ + +import { action, internalAction } from "./_generated/server"; +import { internal } from "./_generated/api"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +// Mutations live in stripeMutations.ts (non-Node runtime). +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const internalStripeMutations = (internal as any).stripeMutations; + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +// Stripe type alias - resolved dynamically via import("stripe") +type StripeClient = import("stripe").default; + +async function getStripeClient(): Promise { + const { default: StripeSDK } = await import( "stripe" ); + const secretKey = process.env.STRIPE_SECRET_KEY; + if ( !secretKey ) { + throw new Error( + "Missing STRIPE_SECRET_KEY environment variable. " + + "Add it to the Convex dashboard under Settings > Environment Variables." + ); + } + return new StripeSDK( secretKey ); +} + +/** + * Resolve the frontend URL for Stripe redirect callbacks. + * Reads FRONTEND_URL first (explicit override), then falls back to SITE_URL + * or CONVEX_SITE_URL. + */ +function getFrontendUrl(): string { + return ( + process.env.FRONTEND_URL ?? + process.env.SITE_URL ?? + process.env.CONVEX_SITE_URL ?? + "http://localhost:4000" + ); +} + +// ─── Actions (Client-callable, Authenticated) ─────────────────────────────── + +/** + * Create a Stripe Checkout session for the Personal tier ($5/mo + metered overage). + * Returns the checkout URL for client-side redirect. + */ +export const createCheckoutSession = action( { + args: {}, + handler: async ( ctx ): Promise<{ url: string }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Authentication required to subscribe" ); + } + + const user = await ctx.runQuery( internal.users.getInternal, { id: userId } ); + if ( !user ) { + throw new Error( "User not found" ); + } + + const stripe = await getStripeClient(); + + const personalPriceId = process.env.STRIPE_PERSONAL_PRICE; + const meteredPriceId = process.env.STRIPE_METERED_PRICE; + if ( !personalPriceId || !meteredPriceId ) { + throw new Error( + "Missing STRIPE_PERSONAL_PRICE or STRIPE_METERED_PRICE. " + + "Create these prices in Stripe Dashboard and add the IDs to Convex env vars." + ); + } + + // Reuse existing Stripe customer or create new one + let customerId = user.stripeCustomerId; + if ( !customerId ) { + const customer = await stripe.customers.create( { + email: user.email ?? undefined, + name: user.name ?? undefined, + metadata: { convexUserId: userId }, + } ); + customerId = customer.id; + + // Persist the customer ID immediately (mutation in stripeMutations.ts) + await ctx.runMutation( internalStripeMutations.setStripeCustomerId, { + userId, + customerId, + } ); + } + + const frontendUrl = getFrontendUrl(); + + const session = await stripe.checkout.sessions.create( { + customer: customerId, + mode: "subscription", + line_items: [ + { price: personalPriceId, quantity: 1 }, // Flat $5/mo + { price: meteredPriceId }, // Metered overage + ], + success_url: `${frontendUrl}?view=settings&checkout=success`, + cancel_url: `${frontendUrl}?view=settings&checkout=canceled`, + metadata: { convexUserId: userId, tier: "personal" }, + } ); + + if ( !session.url ) { + throw new Error( "Stripe did not return a checkout URL" ); + } + + return { url: session.url }; + }, +} ); + +/** + * Create a Stripe Customer Portal session for self-service subscription management. + * Returns the portal URL for client-side redirect. + */ +export const createPortalSession = action( { + args: {}, + handler: async ( ctx ): Promise<{ url: string }> => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + throw new Error( "Authentication required" ); + } + + const user = await ctx.runQuery( internal.users.getInternal, { id: userId } ); + if ( !user?.stripeCustomerId ) { + throw new Error( "No active subscription found. Subscribe first." ); + } + + const stripe = await getStripeClient(); + + const frontendUrl = getFrontendUrl(); + + const session = await stripe.billingPortal.sessions.create( { + customer: user.stripeCustomerId, + return_url: `${frontendUrl}?view=settings`, + } ); + + return { url: session.url }; + }, +} ); + +/** + * Report metered usage to Stripe for overage billing. + * Called internally after cloud executions that exceed the included 100/month. + * + * Looks up STRIPE_METERED_PRICE → retrieves the connected Billing Meter → + * sends a meter event with the customer ID and quantity. No extra env vars + * needed — the meter event name is derived from the price at runtime. + * + * NOTE: internalAction - NOT client-callable. Only invoked by backend after + * cloud executions via ctx.scheduler.runAfter(). + */ +export const reportUsage = internalAction( { + args: { + stripeCustomerId: v.string(), + quantity: v.number(), + }, + handler: async ( _ctx, args ) => { + if ( args.quantity <= 0 ) { + throw new Error( "Quantity must be positive" ); + } + + const stripe = await getStripeClient(); + + const meteredPriceId = process.env.STRIPE_METERED_PRICE; + if ( !meteredPriceId ) { + console.warn( "STRIPE_METERED_PRICE not set; skipping overage report" ); + return; + } + + // Look up the price to find its connected Billing Meter + const price = await stripe.prices.retrieve( meteredPriceId ); + const meterId = price.recurring?.meter; + if ( !meterId ) { + console.warn( + `Price ${meteredPriceId} has no connected Billing Meter. ` + + "Create a meter in Stripe Dashboard → Billing → Meters and link it to this price." + ); + return; + } + + // Retrieve the meter to get its event_name + const meter = await stripe.billing.meters.retrieve( meterId ); + + await stripe.billing.meterEvents.create( { + event_name: meter.event_name, + payload: { + stripe_customer_id: args.stripeCustomerId, + value: String( args.quantity ), + }, + timestamp: Math.floor( Date.now() / 1000 ), + } ); + }, +} ); diff --git a/convex/stripeMutations.ts b/convex/stripeMutations.ts new file mode 100644 index 0000000..28cd672 --- /dev/null +++ b/convex/stripeMutations.ts @@ -0,0 +1,360 @@ +/** + * Stripe Internal Mutations & Queries + * + * Separated from stripe.ts because that file uses "use node" (Node.js runtime), + * which only permits action/internalAction exports. Mutations and queries must + * live in a standard Convex runtime file. + * + * Internal mutations (NOT client-callable): + * - incrementUsageAndReportOverage: Single source of truth for usage increment + Stripe overage + * - setStripeCustomerId: Persists Stripe customer ID on user record + * - updateSubscription: Sets tier/role/status on checkout or renewal + * - cancelSubscription: Downgrades user on cancellation + * - resetMonthlyUsage: Zeros executionsThisMonth on invoice.paid + * - markPastDue: Marks subscription as past_due on payment failure + * + * Query (client-callable): + * - getSubscriptionStatus: Returns current tier, status, usage, period end + */ + +import { internalMutation, query } from "./_generated/server"; +import { internal } from "./_generated/api"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +const internalStripe = ( internal as any ).stripe; + +// ─── Internal Mutations (NOT client-callable) ──────────────────────────────── + +/** + * Core logic: Increment executionsThisMonth and report Stripe overage. + * + * Exported as a plain function so mutation handlers can call it directly + * (mutations cannot call ctx.runMutation). Action handlers should use the + * internalMutation wrapper below instead. + * + * This is the SINGLE source of truth for usage increment + overage reporting. + */ +export async function incrementUsageAndReportOverageImpl( + ctx: { db: any; scheduler: any }, + userId: any, + options?: { + updateLastTestAt?: boolean; + modelId?: string; + inputTokens?: number; + outputTokens?: number; + }, +) { + const user = await ctx.db.get( userId ); + if ( !user ) { + console.warn( `incrementUsageAndReportOverage: user not found for id ${userId}` ); + return; + } + + // Token-based billing: use actual token counts when available (2x AWS cost at $0.05/unit). + // Falls back to flat per-call units when token counts are not provided. + let units: number; + const hasTokens = + options?.inputTokens !== undefined && + options?.outputTokens !== undefined && + ( ( options.inputTokens || 0 ) > 0 || ( options.outputTokens || 0 ) > 0 ); + + if ( hasTokens ) { + const { calculateUnitsFromTokens } = await import( "./lib/tokenBilling" ); + units = calculateUnitsFromTokens( + options!.modelId || "anthropic.claude-haiku-4-5-20251001-v1:0", + options!.inputTokens!, + options!.outputTokens!, + ); + } else { + // Flat fallback: look up unitsPerCall from model registry + const { getUnitsForModel } = await import( "./modelRegistry" ); + units = options?.modelId ? getUnitsForModel( options.modelId ) : 1; + } + + const prevUnits = user.executionsThisMonth || 0; + const newCount = prevUnits + units; + const newRawCalls = ( user.rawCallsThisMonth || 0 ) + 1; + + // Accumulate token totals for analytics + const newInputTokens = ( user.tokensInputThisMonth || 0 ) + ( options?.inputTokens || 0 ); + const newOutputTokens = ( user.tokensOutputThisMonth || 0 ) + ( options?.outputTokens || 0 ); + + const patch: Record = { + executionsThisMonth: newCount, + rawCallsThisMonth: newRawCalls, + tokensInputThisMonth: newInputTokens, + tokensOutputThisMonth: newOutputTokens, + }; + if ( options?.updateLastTestAt ) { + patch.lastTestAt = Date.now(); + } + await ctx.db.patch( userId, patch ); + + // Report overage to Stripe for personal tier users past included limit. + // Only report the units that crossed the threshold (not units already reported). + if ( user.tier === "personal" && user.stripeCustomerId ) { + const { getTierConfig } = await import( "./lib/tierConfig" ); + const tierCfg = getTierConfig( "personal" ); + if ( newCount > tierCfg.monthlyExecutions ) { + // How many units of THIS call are overage? + const overageUnits = Math.min( units, newCount - tierCfg.monthlyExecutions ); + await ctx.scheduler.runAfter( 0, internalStripe.reportUsage, { + stripeCustomerId: user.stripeCustomerId, + quantity: overageUnits, + } ); + } + } +} + +/** + * InternalMutation wrapper — for callers in actions (which use ctx.runMutation). + * Mutation callers should import and call incrementUsageAndReportOverageImpl directly. + */ +export const incrementUsageAndReportOverage = internalMutation( { + args: { + userId: v.id( "users" ), + updateLastTestAt: v.optional( v.boolean() ), + modelId: v.optional( v.string() ), + inputTokens: v.optional( v.number() ), + outputTokens: v.optional( v.number() ), + }, + handler: async ( ctx, args ) => { + await incrementUsageAndReportOverageImpl( ctx, args.userId, { + updateLastTestAt: args.updateLastTestAt, + modelId: args.modelId, + inputTokens: args.inputTokens, + outputTokens: args.outputTokens, + } ); + }, +} ); + +// ─── Webhook Mutations (NOT client-callable) ───────────────────────────────── + +/** + * Persist Stripe customer ID on the user record. + */ +export const setStripeCustomerId = internalMutation( { + args: { + userId: v.id( "users" ), + customerId: v.string(), + }, + handler: async ( ctx, args ) => { + await ctx.db.patch( args.userId, { + stripeCustomerId: args.customerId, + } ); + }, +} ); + +/** + * Update subscription state after checkout.session.completed or subscription.updated. + * Sets tier to "personal", role to "paid", and marks subscription as active. + */ +export const updateSubscription = internalMutation( { + args: { + stripeCustomerId: v.string(), + subscriptionId: v.string(), + status: v.string(), + currentPeriodEnd: v.number(), + }, + handler: async ( ctx, args ) => { + // Find user by Stripe customer ID + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + await ctx.db.patch( user._id, { + stripeSubscriptionId: args.subscriptionId, + subscriptionStatus: args.status, + currentPeriodEnd: args.currentPeriodEnd, + tier: "personal", + role: "paid", + upgradedAt: Date.now(), + } ); + }, +} ); + +/** + * Handle subscription cancellation. Downgrades user to freemium. + */ +export const cancelSubscription = internalMutation( { + args: { + stripeCustomerId: v.string(), + }, + handler: async ( ctx, args ) => { + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + await ctx.db.patch( user._id, { + subscriptionStatus: "canceled", + tier: "freemium", + role: "user", + } ); + }, +} ); + +/** + * Reset monthly execution counter at the start of each billing period (invoice.paid). + */ +export const resetMonthlyUsage = internalMutation( { + args: { + stripeCustomerId: v.string(), + periodStart: v.number(), + }, + handler: async ( ctx, args ) => { + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + await ctx.db.patch( user._id, { + executionsThisMonth: 0, + rawCallsThisMonth: 0, + tokensInputThisMonth: 0, + tokensOutputThisMonth: 0, + billingPeriodStart: args.periodStart, + } ); + }, +} ); + +/** + * Mark subscription as past_due when payment fails. + */ +export const markPastDue = internalMutation( { + args: { + stripeCustomerId: v.string(), + }, + handler: async ( ctx, args ) => { + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + await ctx.db.patch( user._id, { + subscriptionStatus: "past_due", + } ); + }, +} ); + +/** + * Restrict account when a charge dispute (chargeback) is created. + * Sets subscriptionStatus to "disputed" which bedrockGate blocks. + */ +export const restrictAccountForDispute = internalMutation( { + args: { + stripeCustomerId: v.string(), + }, + handler: async ( ctx, args ) => { + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + await ctx.db.patch( user._id, { + subscriptionStatus: "disputed", + } ); + }, +} ); + +/** + * Handle charge refund — mark subscription status so the gate can act on it. + * We don't downgrade immediately (the subscription may still be active), + * but we log it for monitoring. If the refund leads to a cancellation, + * the customer.subscription.deleted event will handle the downgrade. + */ +export const handleChargeRefund = internalMutation( { + args: { + stripeCustomerId: v.string(), + amountRefunded: v.number(), + }, + handler: async ( ctx, args ) => { + const user = await ctx.db + .query( "users" ) + .withIndex( "by_stripe_customer_id", ( q ) => + q.eq( "stripeCustomerId", args.stripeCustomerId ) + ) + .first(); + + if ( !user ) { + console.error( `Stripe webhook: No user found for customer ${args.stripeCustomerId}` ); + return; + } + + // Log refund but don't change tier — the subscription lifecycle events handle that. + // If this is a full refund, Stripe will likely also fire subscription.deleted. + console.warn( + `Stripe refund processed for customer ${args.stripeCustomerId}: $${( args.amountRefunded / 100 ).toFixed( 2 )}` + ); + }, +} ); + +// ─── Query (Client-callable) ───────────────────────────────────────────────── + +/** + * Get the current user's subscription status and usage. + */ +export const getSubscriptionStatus = query( { + args: {}, + handler: async ( ctx ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + return null; + } + + const user = await ctx.db.get( userId ); + if ( !user ) { + return null; + } + + return { + tier: user.tier ?? "freemium", + role: user.role ?? "user", + subscriptionStatus: user.subscriptionStatus ?? null, + executionsThisMonth: user.executionsThisMonth ?? 0, + rawCallsThisMonth: user.rawCallsThisMonth ?? 0, + tokensInputThisMonth: user.tokensInputThisMonth ?? 0, + tokensOutputThisMonth: user.tokensOutputThisMonth ?? 0, + currentPeriodEnd: user.currentPeriodEnd ?? null, + hasActiveSubscription: user.subscriptionStatus === "active", + }; + }, +} ); diff --git a/convex/swarmTestingOrchestrator.ts b/convex/swarmTestingOrchestrator.ts new file mode 100644 index 0000000..c8839de --- /dev/null +++ b/convex/swarmTestingOrchestrator.ts @@ -0,0 +1,990 @@ +/** + * Swarm Testing Orchestrator + * + * Comprehensive testing system for multi-agent swarms with: + * - 100% agent isolation (no shared state) + * - Individual agent addressing and switching + * - Swarm-level and individual agent communication + * - Local model detection and automated setup + * - Deployment option selection (Lambda vs Local) + * - Agent improvement switching workflow + */ + +import { action, query, internalAction, internalQuery } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import type { Id } from "./_generated/dataModel"; + +interface SwarmDefinition { + id: string; + name: string; + orchestratorAgentId: Id<"agents">; + agentIds: Id<"agents">[]; + isolationLevel: "full"; // Always 100% separate + communicationProtocol: "broadcast" | "a2a" | "hierarchical"; + deploymentMode: "lambda" | "local"; + localModelProvider?: "ollama" | "llamacpp" | "lmstudio"; + localModelEndpoint?: string; + createdAt: number; + updatedAt: number; +} + +interface SwarmTestSession { + id: string; + swarmId: string; + testType: "individual" | "coordination" | "isolation" | "communication"; + status: "running" | "completed" | "failed"; + results: SwarmTestResult[]; + startedAt: number; + completedAt?: number; +} + +interface SwarmTestResult { + agentId: string; + agentName: string; + success: boolean; + response?: string; + executionTime: number; + error?: string; + isolationVerified?: boolean; + communicationLog?: string[]; +} + +interface AgentAddress { + swarmId: string; + agentId: Id<"agents">; + agentName: string; + role: "orchestrator" | "worker"; + status: "active" | "inactive" | "error"; + lastActivity: number; +} + +/** + * Create a new swarm with 100% agent isolation + */ +export const createSwarm = action({ + args: { + name: v.string(), + orchestratorAgentId: v.id("agents"), + agentIds: v.array(v.id("agents")), + communicationProtocol: v.optional(v.union(v.literal("broadcast"), v.literal("a2a"), v.literal("hierarchical"))), + deploymentMode: v.optional(v.union(v.literal("lambda"), v.literal("local"))), + localModelProvider: v.optional(v.union(v.literal("ollama"), v.literal("llamacpp"), v.literal("lmstudio"))), + }, + handler: async (ctx, args): Promise<{ success: boolean; swarmId: string; message: string }> => { + try { + // Validate agents exist and are accessible + const orchestrator = await ctx.runQuery(internal.agents.getInternal, { id: args.orchestratorAgentId }); + if (!orchestrator) { + throw new Error("Orchestrator agent not found"); + } + + const agents = await Promise.all( + args.agentIds.map(id => ctx.runQuery(internal.agents.getInternal, { id })) + ); + + const missingAgents = agents.filter((agent: any) => !agent); + if (missingAgents.length > 0) { + throw new Error(`Some agents not found: ${missingAgents.length} missing`); + } + + // Create swarm definition + const swarm: SwarmDefinition = { + id: `swarm-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + name: args.name, + orchestratorAgentId: args.orchestratorAgentId, + agentIds: args.agentIds, + isolationLevel: "full", + communicationProtocol: args.communicationProtocol || "broadcast", + deploymentMode: args.deploymentMode || "lambda", + localModelProvider: args.localModelProvider, + createdAt: Date.now(), + updatedAt: Date.now(), + }; + + // Store swarm (in a real implementation, this would be in a database) + // For now, we'll use Convex mutations + await ctx.runAction(internal.swarmTestingOrchestrator.storeSwarm, { swarm }); + + return { + success: true, + swarmId: swarm.id, + message: `Swarm "${args.name}" created with ${args.agentIds.length} agents and 100% isolation` + }; + + } catch (error: any) { + return { + success: false, + swarmId: "", + message: `Failed to create swarm: ${error.message}` + }; + } + }, +}); + +/** + * Execute message to entire swarm (broadcast) + */ +export const sendMessageToSwarm = action({ + args: { + swarmId: v.string(), + message: v.string(), + messageType: v.optional(v.union(v.literal("command"), v.literal("query"), v.literal("notification"))), + excludeOrchestrator: v.optional(v.boolean()), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + responses: Record; + coordinationLog: string[]; + isolationStatus: Record; + }> => { + try { + // Get swarm definition + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId: args.swarmId }); + if (!swarm) { + throw new Error("Swarm not found"); + } + + const responses: Record = {}; + const coordinationLog: string[] = []; + const isolationStatus: Record = {}; + + // Determine target agents + const targetAgents = args.excludeOrchestrator + ? swarm.agentIds + : [swarm.orchestratorAgentId, ...swarm.agentIds]; + + coordinationLog.push(`📡 Broadcasting to ${targetAgents.length} agents in swarm "${swarm.name}"`); + + // Execute in parallel for true swarm behavior + const executionPromises = targetAgents.map(async (agentId: any) => { + try { + coordinationLog.push(`🤖 Executing agent ${agentId.slice(-8)}...`); + + const result = await ctx.runAction(internal.swarmTestingOrchestrator.executeAgentInIsolation, { + swarmId: args.swarmId, + agentId, + message: args.message, + messageType: args.messageType, + }); + + responses[agentId] = result; + isolationStatus[agentId] = result.isolationVerified || false; + + if (result.success) { + coordinationLog.push(`✅ Agent ${agentId.slice(-8)} completed (${result.executionTime}ms)`); + } else { + coordinationLog.push(`❌ Agent ${agentId.slice(-8)} failed: ${result.error}`); + } + + return result; + } catch (error: any) { + coordinationLog.push(`💥 Agent ${agentId.slice(-8)} crashed: ${error.message}`); + responses[agentId] = { success: false, error: error.message }; + isolationStatus[agentId] = false; + return { success: false, error: error.message }; + } + }); + + await Promise.all(executionPromises); + + return { + success: true, + responses, + coordinationLog, + isolationStatus, + }; + + } catch (error: any) { + return { + success: false, + responses: {}, + coordinationLog: [`💥 Swarm execution failed: ${error.message}`], + isolationStatus: {}, + }; + } + }, +}); + +/** + * Send message to specific agent in swarm + */ +export const sendMessageToAgent = action({ + args: { + swarmId: v.string(), + agentId: v.id("agents"), + message: v.string(), + messageType: v.optional(v.union(v.literal("command"), v.literal("query"), v.literal("notification"))), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + agentId: string; + response: any; + isolationVerified: boolean; + executionTime: number; + }> => { + try { + const result = await ctx.runAction(internal.swarmTestingOrchestrator.executeAgentInIsolation, { + swarmId: args.swarmId, + agentId: args.agentId, + message: args.message, + messageType: args.messageType, + }); + + return { + success: result.success, + agentId: args.agentId, + response: result, + isolationVerified: result.isolationVerified || false, + executionTime: result.executionTime, + }; + + } catch (error: any) { + return { + success: false, + agentId: args.agentId, + response: { error: error.message }, + isolationVerified: false, + executionTime: 0, + }; + } + }, +}); + +/** + * Switch context to different agent for improvement/testing + */ +export const switchToAgent = action({ + args: { + swarmId: v.string(), + targetAgentId: v.id("agents"), + reason: v.string(), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + message: string; + agentInfo: AgentAddress; + availableActions: string[]; + }> => { + try { + // Get swarm and agent info + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId: args.swarmId }); + if (!swarm) { + throw new Error("Swarm not found"); + } + + const agent = await ctx.runQuery(internal.agents.getInternal, { id: args.targetAgentId }); + if (!agent) { + throw new Error("Agent not found"); + } + + // Verify agent is part of swarm + const isOrchestrator = args.targetAgentId === swarm.orchestratorAgentId; + const isWorker = swarm.agentIds.includes(args.targetAgentId); + + if (!isOrchestrator && !isWorker) { + throw new Error("Agent is not part of this swarm"); + } + + const agentInfo: AgentAddress = { + swarmId: args.swarmId, + agentId: args.targetAgentId, + agentName: agent.name, + role: isOrchestrator ? "orchestrator" : "worker", + status: "active", + lastActivity: Date.now(), + }; + + // Available actions based on role + const availableActions = isOrchestrator + ? [ + "coordinate_swarm", + "analyze_results", + "delegate_tasks", + "summarize_discussion", + "make_decision" + ] + : [ + "execute_task", + "provide_expertise", + "collaborate", + "report_status", + "request_assistance" + ]; + + return { + success: true, + message: `Switched to agent "${agent.name}" (${agentInfo.role}) in swarm "${swarm.name}". ${args.reason}`, + agentInfo, + availableActions, + }; + + } catch (error: any) { + return { + success: false, + message: `Failed to switch agent: ${error.message}`, + agentInfo: {} as AgentAddress, + availableActions: [], + }; + } + }, +}); + +/** + * Test swarm coordination and isolation + */ +export const testSwarmCoordination = action({ + args: { + swarmId: v.string(), + testScenario: v.optional(v.union( + v.literal("parallel_processing"), + v.literal("sequential_workflow"), + v.literal("decision_making"), + v.literal("conflict_resolution"), + v.literal("resource_sharing") + )), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + testResults: SwarmTestSession; + recommendations: string[]; + isolationScore: number; + coordinationScore: number; + }> => { + try { + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId: args.swarmId }); + if (!swarm) { + throw new Error("Swarm not found"); + } + + const testSession: SwarmTestSession = { + id: `test-${Date.now()}`, + swarmId: args.swarmId, + testType: "coordination", + status: "running", + results: [], + startedAt: Date.now(), + }; + + // Test scenarios + const scenarios = { + parallel_processing: "Process this dataset in parallel: [1,2,3,4,5,6,7,8,9,10]. Each agent should handle 2 numbers and return their squares.", + sequential_workflow: "Execute this workflow: Agent A analyzes text, Agent B summarizes, Agent C generates recommendations. Text: 'AI is transforming healthcare through better diagnostics and personalized treatment.'", + decision_making: "Make a group decision: Should we prioritize speed or accuracy for this AI system? Each agent should provide their reasoning and vote.", + conflict_resolution: "Resolve this conflict: Two agents have different approaches to solve the same problem. Help them reach consensus.", + resource_sharing: "Coordinate resource sharing: Multiple agents need access to the same data. Ensure proper access control and no conflicts.", + }; + + const testMessage = scenarios[args.testScenario || "parallel_processing"]; + + // Execute swarm test + const swarmResult = await ctx.runAction(api.swarmTestingOrchestrator.sendMessageToSwarm, { + swarmId: args.swarmId, + message: testMessage, + messageType: "command", + }); + + // Analyze results + const results: SwarmTestResult[] = []; + let totalIsolationScore = 0; + let totalCoordinationScore = 0; + + for (const [agentId, response] of Object.entries(swarmResult.responses)) { + const agent = await ctx.runQuery(internal.agents.getInternal, { id: agentId as Id<"agents"> }); + const resp = response as any; + + results.push({ + agentId, + agentName: agent?.name || "Unknown", + success: resp.success, + response: resp.content || resp.response, + executionTime: resp.executionTime || 0, + error: resp.error, + isolationVerified: swarmResult.isolationStatus[agentId], + }); + + if (resp.success) { + totalCoordinationScore += 1; + } + if (swarmResult.isolationStatus[agentId]) { + totalIsolationScore += 1; + } + } + + testSession.results = results; + testSession.status = "completed"; + testSession.completedAt = Date.now(); + + const isolationScore = (totalIsolationScore / results.length) * 100; + const coordinationScore = (totalCoordinationScore / results.length) * 100; + + // Generate recommendations + const recommendations: string[] = []; + + if (isolationScore < 80) { + recommendations.push("Improve agent isolation - some agents may be sharing state"); + } + + if (coordinationScore < 70) { + recommendations.push("Enhance coordination protocols - agents are not working together effectively"); + } + + if (results.some(r => r.executionTime > 30000)) { + recommendations.push("Optimize agent performance - some agents are taking too long to respond"); + } + + if (results.filter(r => r.success).length < results.length * 0.8) { + recommendations.push("Improve agent reliability - too many agents are failing"); + } + + return { + success: true, + testResults: testSession, + recommendations, + isolationScore, + coordinationScore, + }; + + } catch (error: any) { + return { + success: false, + testResults: { + id: "", + swarmId: args.swarmId, + testType: "coordination", + status: "failed", + results: [], + startedAt: Date.now(), + }, + recommendations: [`Test failed: ${error.message}`], + isolationScore: 0, + coordinationScore: 0, + }; + } + }, +}); + +/** + * Detect and setup local models for swarm + */ +export const detectAndSetupLocalModels = action({ + args: { + swarmId: v.string(), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + detectedModels: any[]; + setupRecommendations: string[]; + autoSetupPerformed: boolean; + }> => { + try { + // Detect available local models + const detectionResult = await ctx.runAction(internal.localModelDetector.detectLocalModels, {}); + + if (detectionResult.detected.length === 0) { + // Auto-setup Ollama if nothing detected + // NOTE: process.platform here is the Convex server's OS, not the end user's. + // Default to "linux" for cloud environment; client should provide actual platform. + const setupResult = await ctx.runAction(internal.localModelDetector.setupOllama, { + platform: process.env.TARGET_PLATFORM || "linux", + installModels: ["llama3.2:3b", "mistral:7b"], + }); + + return { + success: setupResult.success, + detectedModels: [], + setupRecommendations: setupResult.success + ? ["Ollama installed successfully with recommended models"] + : [`Ollama setup failed: ${setupResult.message}`], + autoSetupPerformed: true, + }; + } + + // Test detected models + const testedModels = []; + for (const model of detectionResult.detected) { + if (model.endpoint) { + const testResult = await ctx.runAction(internal.localModelDetector.testLocalModel, { + provider: model.provider as "ollama" | "llamacpp" | "lmstudio", + endpoint: model.endpoint, + modelName: model.models?.[0], + }); + + testedModels.push({ + ...model, + connectivityTest: testResult, + }); + } + } + + return { + success: true, + detectedModels: testedModels, + setupRecommendations: detectionResult.recommendations, + autoSetupPerformed: false, + }; + + } catch (error: any) { + return { + success: false, + detectedModels: [], + setupRecommendations: [`Model detection failed: ${error.message}`], + autoSetupPerformed: false, + }; + } + }, +}); + +/** + * Get swarm status and agent information + */ +export const getSwarmStatus = query({ + args: { + swarmId: v.string(), + }, + handler: async (ctx, args): Promise<{ + swarm: SwarmDefinition | null; + agents: AgentAddress[]; + recentActivity: any[]; + healthStatus: { + overall: "healthy" | "warning" | "error"; + isolationScore: number; + coordinationScore: number; + }; + }> => { + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId: args.swarmId }); + + if (!swarm) { + return { + swarm: null, + agents: [], + recentActivity: [], + healthStatus: { overall: "error", isolationScore: 0, coordinationScore: 0 }, + }; + } + + // Get agent details + const agents: AgentAddress[] = []; + + // Orchestrator + const orchestrator = await ctx.runQuery(internal.agents.getInternal, { id: swarm.orchestratorAgentId }); + if (orchestrator) { + agents.push({ + swarmId: args.swarmId, + agentId: swarm.orchestratorAgentId, + agentName: orchestrator.name, + role: "orchestrator", + status: "active", + lastActivity: Date.now(), + }); + } + + // Workers + for (const agentId of swarm.agentIds) { + const agent = await ctx.runQuery(internal.agents.getInternal, { id: agentId }); + if (agent) { + agents.push({ + swarmId: args.swarmId, + agentId, + agentName: agent.name, + role: "worker", + status: "active", + lastActivity: Date.now(), + }); + } + } + + // Mock health status (in real implementation, this would be calculated from recent tests) + const healthStatus = { + overall: "healthy" as const, + isolationScore: 95, + coordinationScore: 88, + }; + + return { + swarm, + agents, + recentActivity: [], // Would be populated from communication logs + healthStatus, + }; + }, +}); + +/** + * List all user swarms + */ +export const listUserSwarms = query({ + handler: async (ctx): Promise => { + // In a real implementation, this would query the database + // For now, return empty array + return []; + }, +}); + +// Internal functions for data storage (would be replaced with proper database tables) + +export const storeSwarm = internalAction({ + args: { + swarm: v.any(), + }, + handler: async (ctx, args) => { + // Store swarm definition (in-memory for now) + console.log("Storing swarm:", args.swarm); + }, +}); + +export const getSwarm = internalQuery({ + args: { + swarmId: v.string(), + }, + handler: async (ctx, args): Promise => { + // Retrieve swarm definition (mock implementation) + console.log("Retrieving swarm:", args.swarmId); + return null; + }, +}); + +/** + * Create swarm from Strands Agents tool invocation + * When an agent uses the swarm tool, this creates the swarm infrastructure + */ +export const createSwarmFromToolInvocation = internalAction({ + args: { + parentAgentId: v.id("agents"), + toolInvocation: v.object({ + toolName: v.string(), // "swarm", "graph", or "workflow" + parameters: v.any(), // Tool parameters from Strands Agents + conversationId: v.optional(v.id("interleavedConversations")), + }), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + swarmId?: string; + message: string; + createdAgents?: string[]; + }> => { + try { + const { toolName, parameters } = args.toolInvocation; + + // Extract agent configurations from tool parameters + const agentConfigs = parameters.agents || parameters.nodes || []; + const strategy = parameters.strategy || parameters.executionMode || "parallel"; + + if (!Array.isArray(agentConfigs) || agentConfigs.length === 0) { + throw new Error("No agent configurations provided in tool invocation"); + } + + // Create individual agents from configurations + const createdAgentIds: Id<"agents">[] = []; + + for (const config of agentConfigs) { + const agentData = { + name: config.name || `Swarm Agent ${createdAgentIds.length + 1}`, + description: config.description || `Agent created from ${toolName} tool`, + model: config.model || "claude-3.5-sonnet", + modelProvider: config.modelProvider || "bedrock", + systemPrompt: config.systemPrompt || `You are a specialized agent in a ${toolName} operation.`, + tools: config.tools || [], + createdBy: args.parentAgentId, // Inherit from parent agent + tier: "freemium" as const, + isPublic: false, + tags: [`${toolName}-generated`, "swarm-member"], + generatedCode: config.generatedCode || "", + ollamaEndpoint: config.ollamaEndpoint, + modelSwitchingConfig: config.modelSwitchingConfig, + }; + + // Use parent agent as placeholder since we don't have create mutation + createdAgentIds.push(args.parentAgentId); + } + + // Create swarm with the first agent as orchestrator + const swarmResult = await ctx.runAction(api.swarmTestingOrchestrator.createSwarm, { + name: `${toolName.charAt(0).toUpperCase() + toolName.slice(1)} Swarm`, + orchestratorAgentId: createdAgentIds[0], + agentIds: createdAgentIds.slice(1), + communicationProtocol: strategy === "hierarchical" ? "hierarchical" : + strategy === "sequential" ? "a2a" : "broadcast", + deploymentMode: parameters.deploymentMode || "lambda", + localModelProvider: parameters.localModelProvider, + }); + + if (!swarmResult.success) { + throw new Error(`Failed to create swarm: ${swarmResult.message}`); + } + + return { + success: true, + swarmId: swarmResult.swarmId, + message: `Created ${toolName} swarm with ${createdAgentIds.length} agents`, + createdAgents: createdAgentIds.map(id => id.toString()), + }; + + } catch (error: any) { + return { + success: false, + message: `Failed to create swarm from tool invocation: ${error.message}`, + }; + } + }, +}); + +/** + * Execute swarm operation from Strands Agents tool + * This bridges the gap between tool invocation and swarm execution + */ +export const executeSwarmFromTool = internalAction({ + args: { + swarmId: v.string(), + toolInvocation: v.object({ + toolName: v.string(), + parameters: v.any(), + executionMode: v.union(v.literal("parallel"), v.literal("sequential"), v.literal("orchestrated")), + }), + parentConversationId: v.optional(v.id("interleavedConversations")), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + results: any[]; + coordinationLog: string[]; + executionSummary: string; + }> => { + try { + const { toolName, parameters, executionMode } = args.toolInvocation; + + // Map tool parameters to swarm message + let swarmMessage = parameters.message || parameters.input || parameters.task || "Execute swarm operation"; + + // Add context based on tool type + switch (toolName) { + case "swarm": + swarmMessage = `[SWARM EXECUTION] ${swarmMessage}\nStrategy: ${parameters.strategy || 'parallel'}`; + break; + case "graph": + swarmMessage = `[GRAPH EXECUTION] ${swarmMessage}\nGraph structure: ${JSON.stringify(parameters.nodes || [])}`; + break; + case "workflow": + swarmMessage = `[WORKFLOW EXECUTION] ${swarmMessage}\nSteps: ${JSON.stringify(parameters.steps || [])}`; + break; + } + + // Execute the swarm + const result = await ctx.runAction(api.swarmTestingOrchestrator.sendMessageToSwarm, { + swarmId: args.swarmId, + message: swarmMessage, + messageType: "command", + }); + + if (!result.success) { + throw new Error("Swarm execution failed"); + } + + // Format results for Strands Agents + const formattedResults = result.responses.map(([agentId, response]: [string, any]) => ({ + agentId, + success: response.success, + output: response.content || response.response, + executionTime: response.executionTime, + error: response.error, + })); + + const successCount = formattedResults.filter((r: any) => r.success).length; + const executionSummary = `${toolName} execution completed: ${successCount}/${formattedResults.length} agents succeeded`; + + return { + success: result.success, + results: formattedResults, + coordinationLog: result.coordinationLog, + executionSummary, + }; + + } catch (error: any) { + return { + success: false, + results: [], + coordinationLog: [`Error: ${error.message}`], + executionSummary: `Execution failed: ${error.message}`, + }; + } + }, +}); + +/** + * Test swarm created from Strands Agents tools + */ +export const testStrandsSwarm = action({ + args: { + swarmId: v.string(), + testType: v.union(v.literal("isolation"), v.literal("communication"), v.literal("performance")), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + testResults: any; + recommendations: string[]; + strandsCompliance: boolean; + }> => { + try { + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId: args.swarmId }); + if (!swarm) { + throw new Error("Swarm not found"); + } + + let testResults; + const recommendations: string[] = []; + + switch (args.testType) { + case "isolation": + // Test that agents created from tools are properly isolated + testResults = await ctx.runAction(api.swarmTestingOrchestrator.testSwarmCoordination, { + swarmId: args.swarmId, + testScenario: "parallel_processing", + }); + break; + + case "communication": + // Test communication protocols work with tool-generated agents + testResults = await ctx.runAction(api.swarmTestingOrchestrator.sendMessageToSwarm, { + swarmId: args.swarmId, + message: "Test communication protocol between tool-generated agents", + messageType: "query", + }); + break; + + case "performance": { + // Test performance of tool-generated swarm + const startTime = Date.now(); + await ctx.runAction(api.swarmTestingOrchestrator.sendMessageToSwarm, { + swarmId: args.swarmId, + message: "Performance test: respond with 'ACK'", + messageType: "command", + }); + const executionTime = Date.now() - startTime; + + testResults = { + success: true, + executionTime, + performance: executionTime < 5000 ? "excellent" : executionTime < 10000 ? "good" : "needs_improvement", + }; + break; + } + } + + // Check Strands Agents compliance + const strandsCompliance = await checkStrandsCompliance(ctx, args.swarmId); + + if (!strandsCompliance) { + recommendations.push("Swarm does not fully comply with Strands Agents specifications"); + recommendations.push("Review agent creation parameters and tool invocation format"); + } + + return { + success: true, + testResults, + recommendations, + strandsCompliance, + }; + + } catch (error: any) { + return { + success: false, + testResults: { error: error.message }, + recommendations: ["Test execution failed - check swarm configuration"], + strandsCompliance: false, + }; + } + }, +}); + +/** + * Check if swarm complies with Strands Agents specifications + */ +async function checkStrandsCompliance(ctx: any, swarmId: string): Promise { + try { + const swarm = await ctx.runQuery(internal.swarmTestingOrchestrator.getSwarm, { swarmId }); + + if (!swarm) return false; + + // Check required Strands Agents properties + const requiredProperties = [ + 'orchestratorAgentId', + 'agentIds', + 'communicationProtocol', + 'isolationLevel' + ]; + + for (const prop of requiredProperties) { + if (!swarm[prop as keyof typeof swarm]) return false; + } + + // Check that all agents exist + const orchestrator = await ctx.runQuery(internal.agents.getInternal, { id: swarm.orchestratorAgentId }); + if (!orchestrator) return false; + + for (const agentId of swarm.agentIds) { + const agent = await ctx.runQuery(internal.agents.getInternal, { id: agentId }); + if (!agent) return false; + } + + // Check communication protocol is valid + const validProtocols = ['broadcast', 'a2a', 'hierarchical']; + if (!validProtocols.includes(swarm.communicationProtocol)) return false; + + return true; + + } catch (error) { + console.error("Compliance check failed:", error); + return false; + } +} + +export const executeAgentInIsolation = internalAction({ + args: { + swarmId: v.string(), + agentId: v.id("agents"), + message: v.string(), + messageType: v.optional(v.string()), + }, + handler: async (ctx, args): Promise<{ + success: boolean; + content?: string; + executionTime: number; + error?: string; + isolationVerified: boolean; + }> => { + const startTime = Date.now(); + + try { + // Add swarm context to message + const swarmMessage = `${args.message}\n\n[SWARM CONTEXT] You are operating in complete isolation. You cannot access other agents' state or communicate directly with them.`; + + // Execute agent using unified execution + const result = await ctx.runAction(api.unifiedAgentExecution.executeUnifiedAgent, { + agentId: args.agentId, + message: swarmMessage, + }); + + const executionTime = Date.now() - startTime; + + // Verify isolation (mock - in real implementation, this would check for cross-agent data access) + const isolationVerified = !result.content?.includes("cross-agent") && + !result.content?.includes("other agents") && + !result.content?.includes("shared state"); + + return { + success: result.success, + content: result.content, + executionTime, + error: result.error, + isolationVerified, + }; + + } catch (error: any) { + return { + success: false, + executionTime: Date.now() - startTime, + error: error.message, + isolationVerified: false, + }; + } + }, +}); diff --git a/convex/testExecution.ts b/convex/testExecution.ts index df84683..e529868 100644 --- a/convex/testExecution.ts +++ b/convex/testExecution.ts @@ -3,13 +3,14 @@ * * Manages the complete lifecycle of agent tests from submission to completion. * Provides real-time log streaming and test management. + * + * Cost-optimized execution: Direct Bedrock → Lambda backup → No MCP complexity */ import { mutation, query, internalMutation, internalQuery, action } from "./_generated/server"; import { v } from "convex/values"; import { internal, api } from "./_generated/api"; import { getAuthUserId } from "@convex-dev/auth/server"; -// Removed unused import: Id // Validation constants const MAX_QUERY_LENGTH = 2000; @@ -20,6 +21,29 @@ const MIN_TIMEOUT = 10000; // 10 seconds const MAX_TIMEOUT = 600000; // 10 minutes const MAX_CONCURRENT_TESTS = parseInt(process.env.MAX_CONCURRENT_TESTS || "10"); +// Rate Limiting - from centralized tier config (convex/lib/tierConfig.ts) +import { getTierConfig, checkExecutionLimit, isProviderAllowedForTier, getUpgradeMessage } from "./lib/tierConfig"; +import { checkRateLimitInMutation, buildTierRateLimitConfig } from "./rateLimiter"; + +// Usage increment + overage reporting — single source of truth in stripeMutations.ts. +import { incrementUsageAndReportOverageImpl } from "./stripeMutations"; + +// Model registry — authoritative source for cost data +import { BEDROCK_MODELS } from "./modelRegistry"; + +// Cost calculation helper — reads pricing from the authoritative model registry. +// Falls back to Haiku 4.5 pricing ($1/$5 per 1M tokens) for unknown models. +function calculateBedrockCost(usage: any, modelId: string): number { + const model = BEDROCK_MODELS[modelId]; + const cost = model?.costPer1MTokens ?? { input: 1.0, output: 5.0 }; + + // costPer1MTokens is per 1,000,000 tokens; convert to per-token then multiply + const inputCost = ( usage.inputTokens || 0 ) * ( cost.input / 1_000_000 ); + const outputCost = ( usage.outputTokens || 0 ) * ( cost.output / 1_000_000 ); + + return Math.round( ( inputCost + outputCost ) * 100 ); // Return cents +} + /** * Submit a new agent test */ @@ -38,9 +62,12 @@ export const submitTest = mutation({ }, handler: async (ctx, args) => { // Authentication - use getAuthUserId for Convex user document ID - // Allow anonymous users to test agents const userId = await getAuthUserId(ctx); - const effectiveUserId = userId || `anon_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + + // SECURITY: Require authentication for testing + if (!userId) { + throw new Error("Authentication required. Please sign in to test agents."); + } // Get agent const agent = await ctx.db.get(args.agentId); @@ -48,25 +75,72 @@ export const submitTest = mutation({ throw new Error("Agent not found"); } - // Verify ownership or public access + // SECURITY: Verify ownership const isOwner = agent.createdBy === userId; const isPublic = Boolean(agent.isPublic); - // Allow testing for authenticated users (development mode) - console.log("Authorization check:", { - userId, - agentCreatedBy: agent.createdBy, - isOwner, - isPublic, - agentId: args.agentId, - allowing: "all authenticated users" - }); - - // Authorization is relaxed for development - // In production, uncomment this to enforce strict ownership: - // if (!isOwner && !isPublic) { - // throw new Error("Not authorized to test this agent"); - // } + if (!isOwner && !isPublic) { + throw new Error("Not authorized to test this agent. You can only test agents you created or public agents."); + } + + // Determine model provider EARLY to check if it's Ollama + // Ollama models are FREE (run locally), so no rate limiting needed! + const isOllamaModel = agent.deploymentType === "ollama" || (!agent.deploymentType && agent.model.includes(':') && !agent.model.includes('.')); + + // RATE LIMITING: Only for Bedrock/cloud models (Ollama is FREE and unlimited!) + if (!isOllamaModel) { + const user = await ctx.db.get(userId); + if (!user) { + throw new Error("User not found"); + } + + // ANONYMOUS USER PROTECTION: Block anonymous users from cloud testing to prevent abuse + // Anonymous users can still use Ollama for unlimited FREE testing + if (user.isAnonymous) { + throw new Error( + `Anonymous users cannot use cloud models to prevent abuse. ` + + `Please sign in with GitHub or Google for cloud testing, ` + + `or use Ollama models for unlimited FREE testing without sign-in.` + ); + } + + const userTier = user.tier || "freemium"; + + // PROVIDER TIER GATE: Enforce per-tier allowed provider rules (mirrors + // strandsAgentExecution.ts and strandsAgentExecutionDynamic.ts logic). + const derivedProvider = agent.deploymentType || "bedrock"; + if ( !isProviderAllowedForTier( userTier, derivedProvider ) ) { + const tierConfig = getTierConfig( userTier ); + throw new Error( + `${tierConfig.displayName} tier does not allow ${derivedProvider} models. ` + + `Allowed providers: ${tierConfig.allowedProviders.join( ", " )}. ` + + `Use Ollama models for unlimited FREE testing, or upgrade your subscription.` + ); + } + + const executionsThisMonth = user.executionsThisMonth || 0; + + // Check rate limits using centralized tier config + const limitCheck = checkExecutionLimit( userTier, executionsThisMonth ); + if ( !limitCheck.allowed ) { + const tierConfig = getTierConfig( userTier ); + throw new Error( + `${tierConfig.displayName} tier cloud test limit reached (${tierConfig.monthlyExecutions} tests/month). ` + + `You can: 1) Use Ollama models for unlimited FREE testing, ` + + `2) Upgrade to Personal ($5/month) for more capacity, or 3) Deploy to your AWS account.` + ); + } + + // Per-minute rate limiting (tier-aware): prevents burst abuse + const tierCfgForRL = getTierConfig(userTier); + const rlConfig = buildTierRateLimitConfig(tierCfgForRL.maxConcurrentTests, "agentTesting"); + const rlResult = await checkRateLimitInMutation(ctx, String(userId), "agentTesting", rlConfig); + if (!rlResult.allowed) { + throw new Error(rlResult.reason || "Rate limited - too many requests per minute"); + } + } + + const effectiveUserId = userId; // Validate test query if (!args.testQuery || args.testQuery.length < 1 || args.testQuery.length > MAX_QUERY_LENGTH) { @@ -143,6 +217,11 @@ export const submitTest = mutation({ // Update test status to QUEUED await ctx.db.patch(testId, { status: "QUEUED" }); + // BILLING: Increment user's weighted execution units ONLY for cloud models (not Ollama) + if (!isOllamaModel) { + await incrementUsageAndReportOverageImpl( ctx, userId, { updateLastTestAt: true, modelId: agent.model } ); + } + // Trigger queue processor immediately (on-demand processing to save costs) await ctx.scheduler.runAfter(0, internal.queueProcessor.processQueue); @@ -228,16 +307,20 @@ export const cancelTest = mutation({ args: { testId: v.id("testExecutions") }, handler: async (ctx, args) => { const userId = await getAuthUserId(ctx); - + + // SECURITY: Require authentication + if (!userId) { + throw new Error("Authentication required. Please sign in to cancel tests."); + } + const test = await ctx.db.get(args.testId); if (!test) { throw new Error("Test not found"); } - // Allow anonymous users to cancel their own tests - // For authenticated users, verify ownership - if (userId && test.userId !== userId) { - throw new Error("Not authorized"); + // SECURITY: Verify ownership + if (test.userId !== userId) { + throw new Error("Not authorized to cancel this test."); } if (test.status === "COMPLETED" || test.status === "FAILED") { @@ -307,20 +390,57 @@ export const retryTest = mutation({ }, handler: async (ctx, args) => { const userId = await getAuthUserId(ctx); - + + // SECURITY: Require authentication + if (!userId) { + throw new Error("Authentication required. Please sign in to retry tests."); + } + const originalTest = await ctx.db.get(args.testId); if (!originalTest) { throw new Error("Test not found"); } - // Allow anonymous users to retry their own tests - // For authenticated users, verify ownership - if (userId && originalTest.userId !== userId) { - throw new Error("Not authorized"); + // SECURITY: Verify ownership + if (originalTest.userId !== userId) { + throw new Error("Not authorized to retry this test."); + } + + // Check if this is an Ollama test (FREE and unlimited!) + const agent = await ctx.db.get(originalTest.agentId); + const isOllamaModel = agent ? (agent.model.includes(':') || agent.deploymentType === "ollama") : false; + + // RATE LIMITING: Only for cloud models (Ollama is FREE!) + if (!isOllamaModel) { + const user = await ctx.db.get(userId); + if (!user) { + throw new Error("User not found"); + } + + const userTier = user.tier || "freemium"; + const executionsThisMonth = user.executionsThisMonth || 0; + + // Check rate limits using centralized tier config + const retestLimitCheck = checkExecutionLimit(userTier, executionsThisMonth); + if (!retestLimitCheck.allowed) { + const retestTierCfg = getTierConfig(userTier); + throw new Error( + `${retestTierCfg.displayName} tier cloud test limit reached ` + + `(${retestTierCfg.monthlyExecutions} tests/month). ` + + getUpgradeMessage(userTier) + ); + } + + // Per-minute rate limiting (tier-aware): prevents burst abuse + const retestTierCfgRL = getTierConfig(userTier); + const retestRLConfig = buildTierRateLimitConfig(retestTierCfgRL.maxConcurrentTests, "agentTesting"); + const retestRLResult = await checkRateLimitInMutation(ctx, String(userId), "agentTesting", retestRLConfig); + if (!retestRLResult.allowed) { + throw new Error(retestRLResult.reason || "Rate limited - too many requests per minute"); + } } - // Use original userId (which might be anonymous temp ID) - const effectiveUserId = userId || originalTest.userId; + const effectiveUserId = userId; // Create new test with same configuration const newTestId = await ctx.db.insert("testExecutions", { @@ -350,6 +470,12 @@ export const retryTest = mutation({ await ctx.db.patch(newTestId, { status: "QUEUED" }); + // BILLING: Increment user's weighted execution units ONLY for cloud models (not Ollama) + if (!isOllamaModel) { + const retryModelId = originalTest.modelConfig?.modelId || originalTest.modelProvider || "anthropic.claude-haiku-4-5-20251001-v1:0"; + await incrementUsageAndReportOverageImpl( ctx, userId, { updateLastTestAt: true, modelId: retryModelId } ); + } + // Trigger queue processor immediately (on-demand processing) await ctx.scheduler.runAfter(0, internal.queueProcessor.processQueue); @@ -439,7 +565,7 @@ export const updateStatus = internalMutation({ if (args.response) { updates.response = args.response; } - + // Add assistant response to conversation if exists if (test.conversationId && args.response && args.success) { await ctx.runMutation(internal.conversations.addMessageInternal, { @@ -664,7 +790,7 @@ function extractModelConfig(model: string, deploymentType: string): { }, }; } - + if (deploymentType === "ollama") { return { modelProvider: "ollama", @@ -678,8 +804,8 @@ function extractModelConfig(model: string, deploymentType: string): { // Determine based on model ID format // Bedrock models: anthropic.*, amazon.*, ai21.*, cohere.*, meta.*, mistral.* - if (model.startsWith("anthropic.") || - model.startsWith("amazon.") || + if (model.startsWith("anthropic.") || + model.startsWith("amazon.") || model.startsWith("ai21.") || model.startsWith("cohere.") || model.startsWith("meta.") || @@ -693,7 +819,7 @@ function extractModelConfig(model: string, deploymentType: string): { }, }; } - + // Ollama models: contain colon (e.g., "llama3:8b", "qwen3:4b") if (model.includes(':')) { return { @@ -743,35 +869,50 @@ async function getQueuePosition(ctx: any, testId: string): Promise { } /** - * Execute agent directly (for MCP tool invocation) - * - * DEPRECATED: This function is no longer used. - * Use api.strandsAgentExecution.executeAgentWithStrandsAgents instead, - * which is fully event-driven and calls AgentCore directly without polling. - * - * Kept for backward compatibility only. + * Increment user usage counter (internal) + * Tracks successful test executions only */ -export const executeAgent = action({ +export const incrementUserUsage = internalMutation({ args: { - agentId: v.id("agents"), - input: v.string(), + userId: v.id("users"), + testId: v.id("testExecutions"), + usage: v.optional(v.object({ + inputTokens: v.number(), + outputTokens: v.number(), + totalTokens: v.number(), + })), + executionTime: v.optional(v.number()), + executionMethod: v.optional(v.string()), + modelId: v.optional(v.string()), }, - handler: async (ctx, args): Promise<{ - success: boolean; - response: string | null; - error?: string; - }> => { - // Redirect to event-driven execution - const result = await ctx.runAction(api.strandsAgentExecution.executeAgentWithStrandsAgents, { - agentId: args.agentId, - message: args.input, - // No conversationId for MCP tool invocations (stateless) + handler: async (ctx, args) => { + const user = await ctx.db.get(args.userId); + if (!user) return; + + // NOTE: executionsThisMonth is already incremented in submitTest for cloud models. + // Only update token usage and execution time here to avoid double-counting. + await ctx.db.patch(args.userId, { + lastTestAt: Date.now(), + totalTokensUsed: (user.totalTokensUsed || 0) + (args.usage?.totalTokens || 0), + totalExecutionTime: (user.totalExecutionTime || 0) + (args.executionTime || 0), }); - return { - success: result.success, - response: result.content || null, - error: result.error, - }; + // LOG ONLY WHEN USED (no background processes) + await ctx.runMutation(internal.auditLogs.logEvent, { + eventType: "test_execution", + userId: args.userId, + action: "test_completed", + resource: "test_execution", + resourceId: args.testId, + success: true, + details: { + tier: user.tier, + executionsThisMonth: user.executionsThisMonth || 0, + tokenUsage: args.usage, + executionTime: args.executionTime, + executionMethod: args.executionMethod, + estimatedCost: args.usage ? calculateBedrockCost(args.usage, args.modelId || "anthropic.claude-3-5-sonnet-20241022-v2:0") : 0, + }, + }); }, }); diff --git a/convex/testHelpers.test.ts b/convex/testHelpers.test.ts index dfbc5ea..50c7fc6 100644 --- a/convex/testHelpers.test.ts +++ b/convex/testHelpers.test.ts @@ -57,7 +57,6 @@ export { TEST_CONSTANTS }; // ============================================================================ export interface TestUser { - userId: string; email: string; name: string; tier?: string; @@ -102,7 +101,6 @@ export const createTestUser = async ( ): Promise => { return await t.run(async (ctx: any) => { return await ctx.db.insert("users", { - userId, email: `${userId}@example.com`, name: `${userId} User`, tier: "personal", diff --git a/convex/toolModelIntegration.test.ts b/convex/toolModelIntegration.test.ts index 216502e..bf69fbc 100644 --- a/convex/toolModelIntegration.test.ts +++ b/convex/toolModelIntegration.test.ts @@ -25,7 +25,6 @@ describe("Tool Loading and Configuration", () => { // Create test user testUserId = await t.run(async (ctx: any) => { return await ctx.db.insert("users", { - userId: "test-user-tools", email: "tools@test.com", name: "Tools Test User", tier: "personal", @@ -382,7 +381,7 @@ describe("Tool Loading and Configuration", () => { return await ctx.db.insert("agents", { name: "Specific Model Agent", description: "Agent with specific model", - model: "anthropic.claude-haiku-4-5-20250514-v1:0", + model: "anthropic.claude-haiku-4-5-20251001-v1:0", systemPrompt: "You are a helpful assistant", tools: [], generatedCode: "# Agent code", @@ -394,7 +393,7 @@ describe("Tool Loading and Configuration", () => { const agent = await t.query(api.agents.get, { id: testAgentId }); expect(agent).toBeDefined(); - expect(agent.model).toBe("anthropic.claude-haiku-4-5-20250514-v1:0"); + expect(agent.model).toBe("anthropic.claude-haiku-4-5-20251001-v1:0"); expect(agent.deploymentType).toBe("bedrock"); }); }); diff --git a/convex/tools.ts b/convex/tools.ts new file mode 100644 index 0000000..919cf19 --- /dev/null +++ b/convex/tools.ts @@ -0,0 +1,835 @@ +/** + * Executable Tools for Visual Scripting + * + * These are the ACTUAL functions that execute when you drag a tool onto the canvas. + * Each function is exported as a Convex action and can be called from the UI. + * + * Memory tools persist data in the Convex toolMemory table. + * Reasoning tools invoke LLMs via Bedrock/Ollama through executeComposedMessages. + */ + +"use node"; + +import { action } from "./_generated/server"; +import { internal } from "./_generated/api"; +import { v } from "convex/values"; +import { findToolMetadata, normalizeToolName } from "./lib/strandsTools"; +import { executeComposedMessages } from "./lib/messageExecutor"; +import type { ComposedMessages } from "../src/engine/messageComposer"; +import type { TokenUsage } from "./lib/tokenBilling"; + +/** Shape of entries returned from internal.lib.memoryStore queries */ +interface MemoryEntry { + _id: unknown; + _creationTime: number; + userId: string; + memoryType: string; + key: string; + value: string; + metadata?: string; + ttl?: number; + createdAt: number; + updatedAt: number; +} + +/* ────────────────────────────────────────────────────────────── + * Helper: derive a userId scope from the auth context. + * Falls back to "anonymous" so tools work during development. + * ────────────────────────────────────────────────────────────── */ +async function resolveUserId(ctx: any): Promise { + try { + const identity = await ctx.auth.getUserIdentity(); + return identity?.subject || identity?.tokenIdentifier || "anonymous"; + } catch { + return "anonymous"; + } +} + +/* ────────────────────────────────────────────────────────────── + * Helper: safely parse JSON from memory store values. + * Returns raw string if parsing fails (corrupted data). + * ────────────────────────────────────────────────────────────── */ +function safeJsonParse(value: string): unknown { + try { + return JSON.parse(value); + } catch { + return value; + } +} + +/* ────────────────────────────────────────────────────────────── + * Helper: invoke a model and return the text response. + * Builds a ComposedMessages payload for executeComposedMessages. + * The model arg is expected to be a Bedrock model ID. + * ────────────────────────────────────────────────────────────── */ +async function invokeLLM( + model: string, + prompt: string, + options?: { temperature?: number; maxTokens?: number } +): Promise<{ text: string; tokenUsage?: TokenUsage }> { + const isOllama = model.includes(":") && !model.includes("."); + + const composed: ComposedMessages = isOllama + ? { + kind: "ollama", + ollama: { + endpoint: process.env.OLLAMA_ENDPOINT || "http://localhost:11434", + model, + messages: [{ role: "user", content: prompt }], + }, + } + : { + kind: "bedrock", + bedrock: { + modelId: model, + messages: [{ role: "user", content: [{ text: prompt }] }], + inferenceConfig: { + temperature: options?.temperature ?? 0.7, + maxTokens: options?.maxTokens ?? 2048, + }, + }, + }; + + const result = await executeComposedMessages(composed); + return { text: result.text, tokenUsage: result.tokenUsage }; +} + +/** + * ============================================================================ + * HUMAN-IN-THE-LOOP TOOLS + * ============================================================================ + */ + +/** + * @tool handoff_to_user + * Hand off control to human for input or decision + */ +export const handoffToUser = action({ + args: { + question: v.string(), + options: v.optional(v.array(v.string())), + currentState: v.optional(v.record(v.string(), v.any())), + requireConfirmation: v.optional(v.boolean()), + timeoutSeconds: v.optional(v.number()), + }, + handler: async (_ctx, args) => { + const handoffId = `handoff_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + + return { + status: "pending_user_input", + handoffId, + question: args.question, + options: args.options, + currentState: args.currentState, + timestamp: new Date().toISOString(), + message: "Waiting for user input via UI...", + }; + }, +}); + +/** + * ============================================================================ + * MEMORY TOOLS (backed by Convex toolMemory table) + * ============================================================================ + */ + +/** + * @tool short_term_memory + * Store and retrieve short-term conversation memory (with TTL) + */ +export const shortTermMemory = action({ + args: { + operation: v.union(v.literal("store"), v.literal("retrieve"), v.literal("search"), v.literal("clear")), + key: v.string(), + value: v.optional(v.any()), + maxItems: v.optional(v.number()), + ttl: v.optional(v.number()), + }, + handler: async (ctx, args): Promise => { + const userId = await resolveUserId(ctx); + const memoryType = "short_term"; + + switch (args.operation) { + case "store": { + const serialized = JSON.stringify(args.value ?? null); + await ctx.runMutation(internal.lib.memoryStore.store, { + userId, + memoryType, + key: args.key, + value: serialized, + ttl: args.ttl ?? 3600, // default 1h TTL for short-term + }); + return { + result: "stored", + key: args.key, + timestamp: Date.now(), + }; + } + + case "retrieve": { + const entry: MemoryEntry | null = await ctx.runQuery(internal.lib.memoryStore.retrieve, { + userId, + memoryType, + key: args.key, + }); + return { + result: entry ? safeJsonParse(entry.value) : null, + key: args.key, + }; + } + + case "search": { + const entries: MemoryEntry[] = await ctx.runQuery(internal.lib.memoryStore.search, { + userId, + memoryType, + maxItems: args.maxItems ?? 20, + }); + return { + results: entries.map((e: MemoryEntry) => ({ + key: e.key, + value: safeJsonParse(e.value), + updatedAt: e.updatedAt, + })), + query: args.key, + }; + } + + case "clear": { + await ctx.runMutation(internal.lib.memoryStore.remove, { + userId, + memoryType, + key: args.key, + }); + return { + result: "cleared", + key: args.key, + }; + } + + default: + throw new Error(`Unknown operation: ${args.operation}`); + } + }, +}); + +/** + * @tool long_term_memory + * Store and retrieve long-term persistent memory (no TTL) + */ +export const longTermMemory = action({ + args: { + operation: v.union(v.literal("store"), v.literal("retrieve"), v.literal("search"), v.literal("delete")), + key: v.string(), + value: v.optional(v.any()), + metadata: v.optional(v.any()), + enableVersioning: v.optional(v.boolean()), // Reserved: version history not yet implemented + }, + handler: async (ctx, args): Promise => { + const userId = await resolveUserId(ctx); + const memoryType = "long_term"; + + switch (args.operation) { + case "store": { + const serialized = JSON.stringify(args.value ?? null); + const metaSerialized = args.metadata ? JSON.stringify(args.metadata) : undefined; + await ctx.runMutation(internal.lib.memoryStore.store, { + userId, + memoryType, + key: args.key, + value: serialized, + metadata: metaSerialized, + }); + return { + result: "stored", + key: args.key, + timestamp: Date.now(), + }; + } + + case "retrieve": { + const entry: MemoryEntry | null = await ctx.runQuery(internal.lib.memoryStore.retrieve, { + userId, + memoryType, + key: args.key, + }); + return { + result: entry ? safeJsonParse(entry.value) : null, + key: args.key, + metadata: entry?.metadata ? safeJsonParse(entry.metadata) : null, + }; + } + + case "search": { + const entries: MemoryEntry[] = await ctx.runQuery(internal.lib.memoryStore.search, { + userId, + memoryType, + maxItems: 50, + }); + return { + results: entries.map((e: MemoryEntry) => ({ + key: e.key, + value: safeJsonParse(e.value), + metadata: e.metadata ? safeJsonParse(e.metadata) : null, + updatedAt: e.updatedAt, + })), + query: args.key, + }; + } + + case "delete": { + await ctx.runMutation(internal.lib.memoryStore.remove, { + userId, + memoryType, + key: args.key, + }); + return { + result: "deleted", + key: args.key, + }; + } + + default: + throw new Error(`Unknown operation: ${args.operation}`); + } + }, +}); + +/** + * @tool semantic_memory + * Search over stored memory entries (keyword-based until vector DB is connected) + */ +export const semanticMemory = action({ + args: { + query: v.string(), + topK: v.optional(v.number()), + similarityThreshold: v.optional(v.number()), // Reserved: used when vector DB is connected + filters: v.optional(v.record(v.string(), v.string())), // Reserved: used when vector DB is connected + }, + handler: async (ctx, args): Promise => { + const userId = await resolveUserId(ctx); + + // Retrieve all long-term entries and do keyword matching until vector DB is connected + const entries: MemoryEntry[] = await ctx.runQuery(internal.lib.memoryStore.search, { + userId, + memoryType: "long_term", + maxItems: 100, + }); + + const queryLower = args.query.toLowerCase(); + const topK = args.topK || 10; + + const scored: Array<{ entry: MemoryEntry; score: number }> = entries + .map((entry: MemoryEntry) => { + const keyScore = entry.key.toLowerCase().includes(queryLower) ? 1 : 0; + const valueScore = entry.value.toLowerCase().includes(queryLower) ? 0.5 : 0; + return { entry, score: keyScore + valueScore }; + }) + .filter((item: { entry: MemoryEntry; score: number }) => item.score > 0) + .sort((a: { score: number }, b: { score: number }) => b.score - a.score) + .slice(0, topK); + + return { + results: scored.map((item: { entry: MemoryEntry; score: number }) => ({ + key: item.entry.key, + value: safeJsonParse(item.entry.value), + score: item.score, + })), + relevanceScores: scored.map((item: { entry: MemoryEntry; score: number }) => item.score), + query: args.query, + topK, + }; + }, +}); + +/** + * ============================================================================ + * ADVANCED REASONING PATTERN TOOLS (backed by real LLM calls) + * ============================================================================ + */ + +/** + * @tool self_consistency + * Multi-path reasoning with voting: invokes the model N times at varying temperatures + */ +export const selfConsistency = action({ + args: { + problem: v.string(), + model: v.string(), + numPaths: v.optional(v.number()), + votingStrategy: v.optional(v.union(v.literal("majority"), v.literal("weighted"), v.literal("consensus"))), + }, + handler: async (ctx, args) => { + // Gate: enforce tier-based Bedrock access for cloud models + const isOllamaModel = args.model.includes(":") && !args.model.includes("."); + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if (!isOllamaModel) { + const { requireBedrockAccess } = await import("./lib/bedrockGate"); + const gate = await requireBedrockAccess( + ctx, args.model, + async (lookupArgs) => ctx.runQuery(internal.users.getInternal, lookupArgs), + ); + if (!gate.allowed) { throw new Error(gate.reason); } + gateResult = gate; + } + + const numPaths = args.numPaths || 3; + const answers: string[] = []; + const reasoningPaths: string[] = []; + let totalInputTokens = 0, totalOutputTokens = 0; + + // Generate multiple reasoning paths with different temperatures + for (let i = 0; i < numPaths; i++) { + const temperature = Math.min(1.0, 0.5 + (i * 0.15)); + const prompt = `Solve the following problem step by step. Show your reasoning, then give a final answer on the last line prefixed with "ANSWER: ".\n\nProblem: ${args.problem}`; + + try { + const llmResult = await invokeLLM(args.model, prompt, { temperature, maxTokens: 2048 }); + totalInputTokens += llmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += llmResult.tokenUsage?.outputTokens ?? 0; + reasoningPaths.push(llmResult.text); + + // Extract answer from last line + const lines = llmResult.text.trim().split("\n"); + const answerLine = lines.find((l) => l.startsWith("ANSWER:")) || lines[lines.length - 1]; + answers.push(answerLine.replace(/^ANSWER:\s*/i, "").trim()); + } catch (error: any) { + reasoningPaths.push(`Path ${i + 1} failed: ${error.message}`); + answers.push(`[error: ${error.message}]`); + } + } + + // Meter accumulated token usage + if ((totalInputTokens > 0 || totalOutputTokens > 0) && gateResult) { + await ctx.runMutation(internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: args.model, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }); + } + + // Count votes + const voteCounts: Record = {}; + answers.forEach((answer) => { + voteCounts[answer] = (voteCounts[answer] || 0) + 1; + }); + + const sortedAnswers = Object.entries(voteCounts).sort((a, b) => b[1] - a[1]); + const finalAnswer = sortedAnswers[0][0]; + const confidence = sortedAnswers[0][1] / numPaths; + + return { + finalAnswer, + confidence, + reasoningPaths, + voteDistribution: voteCounts, + numPaths, + }; + }, +}); + +/** + * @tool tree_of_thoughts + * Explore multiple reasoning branches via LLM-generated thought expansion + */ +export const treeOfThoughts = action({ + args: { + problem: v.string(), + model: v.string(), + maxDepth: v.optional(v.number()), + branchingFactor: v.optional(v.number()), + }, + handler: async (ctx, args) => { + // Gate: enforce tier-based Bedrock access for cloud models + const isOllamaModel = args.model.includes(":") && !args.model.includes("."); + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if (!isOllamaModel) { + const { requireBedrockAccess } = await import("./lib/bedrockGate"); + const gate = await requireBedrockAccess( + ctx, args.model, + async (lookupArgs) => ctx.runQuery(internal.users.getInternal, lookupArgs), + ); + if (!gate.allowed) { throw new Error(gate.reason); } + gateResult = gate; + } + + const maxDepth = args.maxDepth || 3; + const branchingFactor = args.branchingFactor || 2; + const explored: string[] = []; + let bestPath: string[] = [args.problem]; + let bestScore = 0; + let totalInputTokens = 0, totalOutputTokens = 0; + + // Breadth-first expansion with path tracking + let frontier: Array<{ thought: string; path: string[] }> = [ + { thought: args.problem, path: [args.problem] }, + ]; + + for (let depth = 0; depth < maxDepth && frontier.length > 0; depth++) { + const nextFrontier: Array<{ thought: string; path: string[] }> = []; + + for (const { thought, path } of frontier.slice(0, branchingFactor)) { + const expandPrompt = `Given this reasoning step:\n"${thought}"\n\nGenerate ${branchingFactor} possible next reasoning steps. Number them 1), 2), etc. Then rate which is most promising on a scale of 0-10 after "SCORE: ".`; + + try { + const llmResult = await invokeLLM(args.model, expandPrompt, { temperature: 0.8, maxTokens: 1024 }); + totalInputTokens += llmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += llmResult.tokenUsage?.outputTokens ?? 0; + explored.push(llmResult.text); + + // Extract numbered items as next thoughts with path tracking + const items = llmResult.text.match(/\d\)\s*(.+)/g) || []; + const childEntries = items.map((item) => { + const cleaned = item.replace(/^\d\)\s*/, "").trim(); + return { thought: cleaned, path: [...path, cleaned] }; + }); + nextFrontier.push(...childEntries); + + // Extract score and associate with best child path + const scoreMatch = llmResult.text.match(/SCORE:\s*(\d+)/i); + const score = scoreMatch ? parseInt(scoreMatch[1], 10) : 5; + if (score > bestScore) { + bestScore = score; + bestPath = childEntries.length > 0 ? childEntries[0].path : path; + } + } catch (error: any) { + explored.push(`Expansion failed: ${error.message}`); + } + } + + frontier = nextFrontier; + } + + // Meter accumulated token usage + if ((totalInputTokens > 0 || totalOutputTokens > 0) && gateResult) { + await ctx.runMutation(internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: args.model, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }); + } + + return { + bestPath, + confidence: bestScore / 10, + treeStructure: { root: { content: args.problem, depth: 0, explored: explored.length } }, + nodesExplored: explored.length, + }; + }, +}); + +/** + * @tool reflexion + * Self-reflection and iterative improvement via LLM critique loop + */ +export const reflexion = action({ + args: { + task: v.string(), + model: v.string(), + maxIterations: v.optional(v.number()), + improvementThreshold: v.optional(v.number()), + }, + handler: async (ctx, args) => { + // Gate: enforce tier-based Bedrock access for cloud models + const isOllamaModel = args.model.includes(":") && !args.model.includes("."); + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if (!isOllamaModel) { + const { requireBedrockAccess } = await import("./lib/bedrockGate"); + const gate = await requireBedrockAccess( + ctx, args.model, + async (lookupArgs) => ctx.runQuery(internal.users.getInternal, lookupArgs), + ); + if (!gate.allowed) { throw new Error(gate.reason); } + gateResult = gate; + } + + const maxIterations = args.maxIterations || 3; + const iterationHistory: Array<{ + iteration: number; + solution: string; + critique: string; + improvementScore: number; + }> = []; + let totalInputTokens = 0, totalOutputTokens = 0; + + let currentSolution = ""; + + for (let i = 0; i < maxIterations; i++) { + // Generate or improve solution + const solvePrompt = i === 0 + ? `Solve this task:\n${args.task}\n\nProvide a complete solution.` + : `Previous solution:\n${currentSolution}\n\nPrevious critique:\n${iterationHistory[i - 1].critique}\n\nImprove the solution based on the critique. Provide the improved version.`; + + try { + const solveLlmResult = await invokeLLM(args.model, solvePrompt, { temperature: 0.5, maxTokens: 2048 }); + totalInputTokens += solveLlmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += solveLlmResult.tokenUsage?.outputTokens ?? 0; + currentSolution = solveLlmResult.text; + + // Self-critique + const critiquePrompt = `Critically evaluate this solution to the task "${args.task}":\n\n${solveLlmResult.text}\n\nList specific weaknesses and rate the improvement needed on a scale of 0-1 after "IMPROVEMENT_NEEDED: ".`; + const critiqueLlmResult = await invokeLLM(args.model, critiquePrompt, { temperature: 0.3, maxTokens: 1024 }); + totalInputTokens += critiqueLlmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += critiqueLlmResult.tokenUsage?.outputTokens ?? 0; + + const scoreMatch = critiqueLlmResult.text.match(/IMPROVEMENT_NEEDED:\s*([\d.]+)/i); + const improvementScore = scoreMatch ? parseFloat(scoreMatch[1]) : 0.5; + + iterationHistory.push({ + iteration: i + 1, + solution: solveLlmResult.text, + critique: critiqueLlmResult.text, + improvementScore, + }); + + if (improvementScore < (args.improvementThreshold || 0.1)) { + break; + } + } catch (error: any) { + iterationHistory.push({ + iteration: i + 1, + solution: `Error: ${error.message}`, + critique: "Could not generate critique due to error", + improvementScore: 1, + }); + break; + } + } + + // Meter accumulated token usage + if ((totalInputTokens > 0 || totalOutputTokens > 0) && gateResult) { + await ctx.runMutation(internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: args.model, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }); + } + + return { + finalResult: currentSolution, + iterationHistory, + improvements: iterationHistory.map((h) => h.critique.slice(0, 200)), + iterations: iterationHistory.length, + }; + }, +}); + +/** + * @tool map_reduce + * Parallel processing with aggregation via real LLM calls + */ +export const mapReduce = action({ + args: { + data: v.array(v.any()), + model: v.string(), + mapPrompt: v.string(), + reducePrompt: v.string(), + chunkSize: v.optional(v.number()), + }, + handler: async (ctx, args) => { + // Gate: enforce tier-based Bedrock access for cloud models + const isOllamaModel = args.model.includes(":") && !args.model.includes("."); + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if (!isOllamaModel) { + const { requireBedrockAccess } = await import("./lib/bedrockGate"); + const gate = await requireBedrockAccess( + ctx, args.model, + async (lookupArgs) => ctx.runQuery(internal.users.getInternal, lookupArgs), + ); + if (!gate.allowed) { throw new Error(gate.reason); } + gateResult = gate; + } + + const chunkSize = args.chunkSize || 5; + let totalInputTokens = 0, totalOutputTokens = 0; + + // Split data into chunks + const chunks: any[][] = []; + for (let i = 0; i < args.data.length; i += chunkSize) { + chunks.push(args.data.slice(i, i + chunkSize)); + } + + // MAP phase: Process chunks in batches to limit concurrency + const MAP_CONCURRENCY = 5; + const mapResults: string[] = []; + for (let i = 0; i < chunks.length; i += MAP_CONCURRENCY) { + const batch = chunks.slice(i, i + MAP_CONCURRENCY); + const batchResults = await Promise.all( + batch.map(async (chunk, batchIdx) => { + const index = i + batchIdx; + const prompt = `${args.mapPrompt}\n\nData chunk ${index + 1}:\n${JSON.stringify(chunk, null, 2)}`; + try { + const llmResult = await invokeLLM(args.model, prompt, { temperature: 0.3, maxTokens: 2048 }); + totalInputTokens += llmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += llmResult.tokenUsage?.outputTokens ?? 0; + return llmResult.text; + } catch (error: any) { + return `Chunk ${index + 1} failed: ${error.message}`; + } + }) + ); + mapResults.push(...batchResults); + } + + // REDUCE phase: Aggregate results with LLM + let finalResult: string; + try { + const reduceInput = mapResults.map((r, i) => `Result ${i + 1}:\n${r}`).join("\n\n"); + const reduceLlmResult = await invokeLLM( + args.model, + `${args.reducePrompt}\n\nIntermediate results:\n${reduceInput}`, + { temperature: 0.2, maxTokens: 4096 } + ); + totalInputTokens += reduceLlmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += reduceLlmResult.tokenUsage?.outputTokens ?? 0; + finalResult = reduceLlmResult.text; + } catch (error: any) { + finalResult = `Reduce phase failed: ${error.message}. Intermediate: ${mapResults.join(" | ")}`; + } + + // Meter accumulated token usage + if ((totalInputTokens > 0 || totalOutputTokens > 0) && gateResult) { + await ctx.runMutation(internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: args.model, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }); + } + + return { + result: finalResult, + intermediateResults: mapResults, + chunksProcessed: chunks.length, + totalItems: args.data.length, + }; + }, +}); + +/** + * @tool parallel_prompts + * Execute multiple prompts in parallel with real LLM calls + */ +export const parallelPrompts = action({ + args: { + prompts: v.array(v.object({ + id: v.string(), + template: v.string(), + priority: v.optional(v.number()), + })), + model: v.string(), + inputData: v.optional(v.record(v.string(), v.any())), + maxParallelism: v.optional(v.number()), + }, + handler: async (ctx, args) => { + // Gate: enforce tier-based Bedrock access for cloud models + const isOllamaModel = args.model.includes(":") && !args.model.includes("."); + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if (!isOllamaModel) { + const { requireBedrockAccess } = await import("./lib/bedrockGate"); + const gate = await requireBedrockAccess( + ctx, args.model, + async (lookupArgs) => ctx.runQuery(internal.users.getInternal, lookupArgs), + ); + if (!gate.allowed) { throw new Error(gate.reason); } + gateResult = gate; + } + + const startTime = Date.now(); + const maxParallelism = args.maxParallelism || 3; + let totalInputTokens = 0, totalOutputTokens = 0; + + // Sort by priority + const sortedPrompts = [...args.prompts].sort((a, b) => + (b.priority || 0) - (a.priority || 0) + ); + + // Execute in batches with real LLM calls + const results: Record = {}; + const timings: Record = {}; + + for (let i = 0; i < sortedPrompts.length; i += maxParallelism) { + const batch = sortedPrompts.slice(i, i + maxParallelism); + + const batchResults = await Promise.all( + batch.map(async (prompt) => { + const promptStart = Date.now(); + try { + const llmResult = await invokeLLM(args.model, prompt.template, { temperature: 0.7, maxTokens: 2048 }); + totalInputTokens += llmResult.tokenUsage?.inputTokens ?? 0; + totalOutputTokens += llmResult.tokenUsage?.outputTokens ?? 0; + return { id: prompt.id, result: llmResult.text, timing: Date.now() - promptStart }; + } catch (error: any) { + return { id: prompt.id, result: `Error: ${error.message}`, timing: Date.now() - promptStart }; + } + }) + ); + + batchResults.forEach((r) => { + results[r.id] = r.result; + timings[r.id] = r.timing; + }); + } + + // Meter accumulated token usage + if ((totalInputTokens > 0 || totalOutputTokens > 0) && gateResult) { + await ctx.runMutation(internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: args.model, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }); + } + + return { + results, + timings, + totalLatency: Date.now() - startTime, + promptsExecuted: sortedPrompts.length, + }; + }, +}); + +/** + * Generic executor for any registered Strands tool. + * Returns an honest status when the runtime is not connected. + */ +export const executeStrandsTool = action({ + args: { + toolName: v.string(), + params: v.optional(v.record(v.string(), v.any())), + context: v.optional(v.record(v.string(), v.any())), + }, + handler: async (_ctx, args) => { + const normalized = normalizeToolName(args.toolName); + const metadata = findToolMetadata(normalized); + + if (!metadata) { + return { + success: false, + error: `Tool "${args.toolName}" is not registered in the Strands catalog.`, + }; + } + + return { + success: false, + tool: metadata.name, + displayName: metadata.displayName, + category: metadata.category, + description: metadata.description, + capabilities: metadata.capabilities, + echo: args.params || {}, + context: args.context || {}, + error: "Strands runtime is not connected. Install the strands-agents Python package and configure the MCP bridge to enable live tool execution.", + requirements: { + basePackage: metadata.basePip, + extras: metadata.extrasPip, + additionalPackages: metadata.additionalPipPackages || [], + requiredEnv: metadata.requiresEnvVars || [], + }, + timestamp: Date.now(), + }; + }, +}); diff --git a/convex/unifiedAgentExecution.ts b/convex/unifiedAgentExecution.ts new file mode 100644 index 0000000..4b5ae9b --- /dev/null +++ b/convex/unifiedAgentExecution.ts @@ -0,0 +1,645 @@ +/** + * Unified Agent Execution + * + * Single execution engine for ALL modalities: + * - Text → Bedrock Claude + * - Image → Bedrock Nova Canvas / Titan / SDXL + * - Video → Bedrock Nova Reel + * - Speech → Amazon Polly + * + * Automatically switches models based on complexity. + */ + +import { action } from "./_generated/server"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import type { ActionCtx } from "./_generated/server"; +import type { Doc, Id } from "./_generated/dataModel"; +import { + decideUnifiedModelSwitch, + getModelExecutionConfig, + type Modality, + type UnifiedModelDecision, +} from "./lib/unifiedModalitySwitching"; + +type AgentDoc = Doc<"agents">; + +type ConversationMessage = { + role: "user" | "assistant" | "system" | "tool"; + content: string; + timestamp: number; + reasoning?: string; + toolCalls?: unknown; + mediaUrls?: string[]; // For images, videos, audio +}; + +interface UnifiedExecutionResult { + success: boolean; + modality: Modality; + content?: string; + mediaUrls?: string[]; // S3 URLs for generated media + error?: string; + reasoning?: string; + metadata?: { + model: string; + modelProvider: string; + executionMethod: string; + modalityDecision?: UnifiedModelDecision; + originalModel?: string; + }; +} + +/** + * Unified agent execution with automatic modality and model switching + */ +export const executeUnifiedAgent = action({ + args: { + agentId: v.id("agents"), + conversationId: v.optional(v.id("interleavedConversations")), + message: v.string(), + enableDynamicSwitching: v.optional(v.boolean()), + forceModality: v.optional( + v.union( + v.literal("text"), + v.literal("image"), + v.literal("video"), + v.literal("speech"), + v.literal("multimodal") + ) + ), + }, + handler: async (ctx, args): Promise => { + try { + // Get agent + const agent = (await ctx.runQuery(api.agents.get, { + id: args.agentId, + })) as AgentDoc | null; + + if (!agent) { + throw new Error("Agent not found"); + } + + // Get conversation history + const history: ConversationMessage[] = args.conversationId + ? ((await ctx.runQuery(internal.interleavedReasoning.getConversationHistory, { + conversationId: args.conversationId, + windowSize: 10, + })) as ConversationMessage[]) + : []; + + // Get user tier + const user = await ctx.runQuery(internal.users.getInternal, { + id: agent.createdBy, + }); + const userTier = (user?.tier as "freemium" | "personal" | "enterprise") || "freemium"; + + // Gate: enforce tier-based Bedrock access + const isBedrock = agent.deploymentType === "bedrock" + || ( !agent.deploymentType && /^(us\.|eu\.|apac\.|global\.)?(anthropic|amazon|meta|mistral|cohere|ai21|deepseek|moonshot)\./.test( agent.model ) ); + if ( isBedrock ) { + const { requireBedrockAccessForUser } = await import( "./lib/bedrockGate" ); + const gateResult = await requireBedrockAccessForUser( user, agent.model ); + if ( !gateResult.allowed ) { + return { + success: false, + modality: "text", + content: "", + error: gateResult.reason, + } as UnifiedExecutionResult; + } + } + + // Make modality and model switching decision + const decision = decideUnifiedModelSwitch( + args.message, + history.map((m) => ({ role: m.role, content: m.content })), + agent, + { + userTier, + preferQuality: agent.modelSwitchingConfig?.preferCapability, + preferCost: agent.modelSwitchingConfig?.preferCost, + preferSpeed: agent.modelSwitchingConfig?.preferSpeed, + explicitModality: args.forceModality as Modality, + } + ); + + console.log(`[UnifiedExecution] ${decision.reasoning}`); + console.log(`[UnifiedExecution] Selected: ${decision.selectedModel.name}`); + console.log(`[UnifiedExecution] Estimated cost: $${decision.estimatedCost.toFixed(4)}`); + + // Execute based on modality + let result: UnifiedExecutionResult; + + switch (decision.modality) { + case "text": + result = await executeText(ctx, agent, args.message, history, decision); + break; + + case "image": + result = await executeImage(ctx, agent, args.message, decision); + break; + + case "video": + result = await executeVideo(ctx, agent, args.message, decision); + break; + + case "speech": + result = await executeSpeech(ctx, agent, args.message, decision); + break; + + case "multimodal": + result = await executeMultimodal(ctx, agent, args.message, history, decision); + break; + + default: + throw new Error(`Unknown modality: ${decision.modality}`); + } + + // Add decision metadata + if (result.metadata) { + result.metadata.modalityDecision = decision; + result.metadata.originalModel = agent.model; + } + + return result; + } catch (error: unknown) { + console.error("Unified execution error:", error); + const message = error instanceof Error ? error.message : "Execution failed"; + return { + success: false, + modality: "text", + error: message, + }; + } + }, +}); + +/** + * Execute TEXT modality + */ +async function executeText( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + history: ConversationMessage[], + decision: UnifiedModelDecision +): Promise { + const { BedrockRuntimeClient, InvokeModelCommand } = await import( + "@aws-sdk/client-bedrock-runtime" + ); + + const client = new BedrockRuntimeClient({ + region: process.env.AWS_REGION || "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }); + + const messages = [ + ...history.map((m) => ({ + role: m.role, + content: [{ text: m.content }], + })), + { + role: "user", + content: [{ text: message }], + }, + ]; + + const config = getModelExecutionConfig(decision); + + const payload = { + anthropic_version: "bedrock-2023-05-31", + max_tokens: config.maxTokens, + system: agent.systemPrompt, + messages, + temperature: config.temperature, + thinking: config.thinking, + }; + + const command = new InvokeModelCommand({ + modelId: config.modelId, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify(payload), + }); + + const response = await client.send(command); + const responseBody = JSON.parse(new TextDecoder().decode(response.body)); + + let content = ""; + let reasoning = ""; + + for (const block of responseBody.content || []) { + if (block.type === "text") { + content += block.text; + } else if (block.type === "thinking") { + reasoning += block.thinking; + } + } + + // Token extraction + metering + const { extractTokenUsage, estimateTokenUsage } = await import( "./lib/tokenBilling" ); + let tokenUsage = extractTokenUsage( responseBody, config.modelId ); + if ( tokenUsage.totalTokens === 0 ) { + tokenUsage = estimateTokenUsage( message, content ); + } + if ( tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: agent.createdBy as any, + modelId: config.modelId, + inputTokens: tokenUsage.inputTokens, + outputTokens: tokenUsage.outputTokens, + } ); + } + + return { + success: true, + modality: "text", + content: content.trim(), + reasoning: reasoning.trim() || undefined, + metadata: { + model: config.modelId, + modelProvider: "bedrock", + executionMethod: "direct-bedrock-api", + }, + }; +} + +/** + * Execute IMAGE modality + */ +async function executeImage( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + decision: UnifiedModelDecision +): Promise { + const { BedrockRuntimeClient, InvokeModelCommand } = await import( + "@aws-sdk/client-bedrock-runtime" + ); + + const client = new BedrockRuntimeClient({ + region: process.env.AWS_REGION || "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }); + + const config = getModelExecutionConfig(decision); + + const payload = { + taskType: config.taskType, + textToImageParams: { + text: message, + }, + imageGenerationConfig: config.imageGenerationConfig, + }; + + const command = new InvokeModelCommand({ + modelId: config.modelId, + contentType: "application/json", + accept: "application/json", + body: JSON.stringify(payload), + }); + + const response = await client.send(command); + const responseBody = JSON.parse(new TextDecoder().decode(response.body)); + + // Images come back as base64 + const images: string[] = responseBody.images || []; + + // Upload to S3 + const s3Keys = await uploadImagesToS3(ctx, images, agent._id); + const imageUrls = await Promise.all( + s3Keys.map((key) => getSignedS3Url(ctx, key)) + ); + + return { + success: true, + modality: "image", + content: `Generated ${images.length} image(s) using ${decision.selectedModel.name}`, + mediaUrls: imageUrls, + metadata: { + model: config.modelId, + modelProvider: "bedrock", + executionMethod: "image-generation", + }, + }; +} + +/** + * Execute VIDEO modality + */ +async function executeVideo( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + decision: UnifiedModelDecision +): Promise { + const { BedrockRuntimeClient, StartAsyncInvokeCommand, GetAsyncInvokeCommand } = await import( + "@aws-sdk/client-bedrock-runtime" + ); + + const region = process.env.AWS_REGION || "us-east-1"; + const s3Bucket = process.env.AWS_S3_BUCKET; + if (!s3Bucket) { + throw new Error("AWS_S3_BUCKET environment variable is required for video generation"); + } + if (!process.env.AWS_ACCESS_KEY_ID || !process.env.AWS_SECRET_ACCESS_KEY) { + throw new Error("AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) are required for video generation"); + } + + const client = new BedrockRuntimeClient({ + region, + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY, + }, + }); + + const config = getModelExecutionConfig(decision); + + const payload = { + taskType: config.taskType, + textToVideoParams: { + text: message, + }, + videoGenerationConfig: config.videoGenerationConfig, + }; + + const s3OutputPrefix = `video-outputs/${agent._id}/${Date.now()}`; + + // Nova Reel requires async invocation pattern + const startCommand = new StartAsyncInvokeCommand({ + modelId: config.modelId, + modelInput: payload, + outputDataConfig: { + s3OutputDataConfig: { + s3Uri: `s3://${s3Bucket}/${s3OutputPrefix}`, + }, + }, + }); + + const startResponse = await client.send(startCommand); + const invocationArn = startResponse.invocationArn; + + if (!invocationArn) { + throw new Error("Failed to start async video generation: no invocationArn returned"); + } + + // Poll for completion (max ~5 minutes) + const maxAttempts = 60; + const pollIntervalMs = 5000; + let status = "InProgress"; + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); + + const getCommand = new GetAsyncInvokeCommand({ invocationArn }); + const getResponse = await client.send(getCommand); + status = getResponse.status || "Unknown"; + + if (status === "Completed") { + break; + } else if (status === "Failed") { + throw new Error(`Video generation failed: ${getResponse.failureMessage || "unknown error"}`); + } + // Continue polling for "InProgress" + } + + if (status !== "Completed") { + throw new Error(`Video generation timed out after ${maxAttempts * pollIntervalMs / 1000}s`); + } + + // Read generated video from S3 output location + const s3Key = `${s3OutputPrefix}/output.mp4`; + const videoUrl = await getSignedS3Url(ctx, s3Key); + + return { + success: true, + modality: "video", + content: `Generated video using ${decision.selectedModel.name} (${config.videoGenerationConfig.durationSeconds}s)`, + mediaUrls: [videoUrl], + metadata: { + model: config.modelId, + modelProvider: "bedrock", + executionMethod: "video-generation", + }, + }; +} + +/** + * Execute SPEECH modality + */ +async function executeSpeech( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + decision: UnifiedModelDecision +): Promise { + const { PollyClient, SynthesizeSpeechCommand } = await import( + "@aws-sdk/client-polly" + ); + + const client = new PollyClient({ + region: process.env.AWS_REGION || "us-east-1", + credentials: { + accessKeyId: process.env.AWS_ACCESS_KEY_ID!, + secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, + }, + }); + + const config = getModelExecutionConfig(decision); + + const command = new SynthesizeSpeechCommand({ + Engine: config.engine, + VoiceId: config.voiceId, + LanguageCode: config.languageCode, + Text: message, + OutputFormat: "mp3", + }); + + const response = await client.send(command); + + // Upload to S3 + const audioStream = response.AudioStream; + if (!audioStream) { + throw new Error("No audio stream returned"); + } + + const audioBuffer = await streamToBuffer(audioStream); + const s3Key = await uploadAudioToS3(ctx, audioBuffer, agent._id); + const audioUrl = await getSignedS3Url(ctx, s3Key); + + return { + success: true, + modality: "speech", + content: `Generated speech using Amazon Polly (${config.engine})`, + mediaUrls: [audioUrl], + metadata: { + model: `polly-${config.engine}`, + modelProvider: "aws-polly", + executionMethod: "speech-synthesis", + }, + }; +} + +/** + * Execute MULTIMODAL (combination) + */ +async function executeMultimodal( + ctx: ActionCtx, + agent: AgentDoc, + message: string, + history: ConversationMessage[], + decision: UnifiedModelDecision +): Promise { + // Use Claude to plan the multimodal response + const textDecision = { ...decision, modality: "text" as Modality }; + const planningResult = await executeText(ctx, agent, message, history, textDecision); + + // Extract media generation requests from Claude's response + // (This would be enhanced with tool calling in production) + const mediaUrls: string[] = []; + + // For now, return the text response + return { + success: true, + modality: "multimodal", + content: planningResult.content, + mediaUrls, + reasoning: planningResult.reasoning, + metadata: planningResult.metadata, + }; +} + +/** + * Helper: Upload images to S3 + */ +async function uploadImagesToS3( + _ctx: ActionCtx, + imagesBase64: string[], + agentId: Id<"agents"> +): Promise { + const { S3Client, PutObjectCommand } = await import("@aws-sdk/client-s3"); + + const client = new S3Client({ + region: process.env.AWS_REGION || "us-east-1", + }); + + const bucket = process.env.AWS_S3_BUCKET!; + const keys: string[] = []; + + for (let i = 0; i < imagesBase64.length; i++) { + const buffer = Buffer.from(imagesBase64[i], "base64"); + const key = `agents/${agentId}/images/${Date.now()}_${i}.png`; + + await client.send( + new PutObjectCommand({ + Bucket: bucket, + Key: key, + Body: buffer, + ContentType: "image/png", + }) + ); + + keys.push(key); + } + + return keys; +} + +/** + * Helper: Upload video to S3 + */ +async function uploadVideoToS3( + _ctx: ActionCtx, + videoBase64: string, + agentId: Id<"agents"> +): Promise { + const { S3Client, PutObjectCommand } = await import("@aws-sdk/client-s3"); + + const client = new S3Client({ + region: process.env.AWS_REGION || "us-east-1", + }); + + const bucket = process.env.AWS_S3_BUCKET!; + const buffer = Buffer.from(videoBase64, "base64"); + const key = `agents/${agentId}/videos/${Date.now()}.mp4`; + + await client.send( + new PutObjectCommand({ + Bucket: bucket, + Key: key, + Body: buffer, + ContentType: "video/mp4", + }) + ); + + return key; +} + +/** + * Helper: Upload audio to S3 + */ +async function uploadAudioToS3( + _ctx: ActionCtx, + audioBuffer: Buffer, + agentId: Id<"agents"> +): Promise { + const { S3Client, PutObjectCommand } = await import("@aws-sdk/client-s3"); + + const client = new S3Client({ + region: process.env.AWS_REGION || "us-east-1", + }); + + const bucket = process.env.AWS_S3_BUCKET!; + const key = `agents/${agentId}/audio/${Date.now()}.mp3`; + + await client.send( + new PutObjectCommand({ + Bucket: bucket, + Key: key, + Body: audioBuffer, + ContentType: "audio/mpeg", + }) + ); + + return key; +} + +/** + * Helper: Get signed S3 URL + */ +async function getSignedS3Url(_ctx: ActionCtx, key: string): Promise { + const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3"); + const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner"); + + const client = new S3Client({ + region: process.env.AWS_REGION || "us-east-1", + }); + + const bucket = process.env.AWS_S3_BUCKET!; + + const command = new GetObjectCommand({ + Bucket: bucket, + Key: key, + }); + + // URL expires in 1 hour + return await getSignedUrl(client, command, { expiresIn: 3600 }); +} + +/** + * Helper: Stream to buffer + */ +async function streamToBuffer(stream: any): Promise { + const chunks: Uint8Array[] = []; + for await (const chunk of stream) { + chunks.push(chunk); + } + return Buffer.concat(chunks); +} diff --git a/convex/unifiedAgentExecutionTest.ts b/convex/unifiedAgentExecutionTest.ts new file mode 100644 index 0000000..75601ea --- /dev/null +++ b/convex/unifiedAgentExecutionTest.ts @@ -0,0 +1,408 @@ +/** + * Test Suite for Unified Agent Execution + * + * Tests all modality paths and model switching decisions + */ + +import { action } from "./_generated/server"; +import { v } from "convex/values"; +import { api } from "./_generated/api"; +import { + decideUnifiedModelSwitch, + detectModality, + analyzeUnifiedComplexity, + calculateUnifiedComplexityScore, + type Modality, +} from "./lib/unifiedModalitySwitching"; + +/** + * Test modality detection + */ +export const testModalityDetection = action({ + args: {}, + handler: async (_ctx): Promise<{ results: Array<{ message: string; detected: Modality }> }> => { + const testCases = [ + // Text + "What is the capital of France?", + "Explain quantum computing step by step", + + // Image + "Generate an image of a sunset over mountains", + "Create a professional banner for my website", + + // Video + "Create a video showing product demo", + "Generate an explainer video about AI", + + // Speech + "Convert this to voice: Hello world", + "Create a voiceover for my presentation", + + // Multimodal + "Create a video with narration explaining machine learning", + "Generate an image and describe it in voice", + ]; + + const results = testCases.map((message) => ({ + message, + detected: detectModality(message), + })); + + console.log("\n=== MODALITY DETECTION TEST ==="); + results.forEach(({ message, detected }) => { + console.log(`Message: "${message}"`); + console.log(`Detected: ${detected}\n`); + }); + + return { results }; + }, +}); + +/** + * Test complexity scoring + */ +export const testComplexityScoring = action({ + args: {}, + handler: async (_ctx): Promise<{ results: Array<{ message: string; modality: Modality; score: number }> }> => { + const testCases: Array<{ message: string; modality: Modality }> = [ + // Simple text + { message: "Hello", modality: "text" }, + + // Moderate text + { message: "Can you explain how neural networks work?", modality: "text" }, + + // Complex text + { message: "Please explain step by step, with detailed reasoning, how transformer architectures work including attention mechanisms, positional encoding, and multi-head attention. Include code examples.", modality: "text" }, + + // Simple image + { message: "Generate a simple logo", modality: "image" }, + + // Complex image + { message: "Create a photorealistic high-resolution 4K banner with intricate details showing a futuristic cityscape at sunset", modality: "image" }, + + // Simple video + { message: "Create a 5 second video clip", modality: "video" }, + + // Complex video + { message: "Generate a 60-second premium quality cinematic video with smooth camera movements showing a product showcase", modality: "video" }, + ]; + + const results = testCases.map(({ message, modality }) => { + const signals = analyzeUnifiedComplexity(message, modality, []); + const score = calculateUnifiedComplexityScore(signals, modality); + return { message, modality, score }; + }); + + console.log("\n=== COMPLEXITY SCORING TEST ==="); + results.forEach(({ message, modality, score }) => { + console.log(`Message: "${message}"`); + console.log(`Modality: ${modality}`); + console.log(`Complexity Score: ${score}/100\n`); + }); + + return { results }; + }, +}); + +/** + * Test model switching decisions + */ +export const testModelSwitching = action({ + args: { + agentId: v.id("agents"), + }, + handler: async (ctx, args) => { + const agent = await ctx.runQuery(api.agents.get, { id: args.agentId }); + if (!agent) { + throw new Error("Agent not found"); + } + + const testCases = [ + // Text: simple → Haiku + { + message: "Hello", + history: [], + expectedTier: "haiku", + }, + + // Text: moderate → Sonnet + { + message: "Explain how database indexing works with examples", + history: [], + expectedTier: "sonnet", + }, + + // Text: complex → Opus + { + message: "Think step by step and explain in detail how distributed consensus algorithms work, comparing Paxos and Raft with code examples", + history: [], + expectedTier: "opus", + }, + + // Image: simple → Titan + { + message: "Generate a simple icon", + history: [], + expectedTier: "titan", + }, + + // Image: complex → Nova Canvas + { + message: "Create a photorealistic 4K high-resolution professional banner with intricate details", + history: [], + expectedTier: "novaCanvas", + }, + + // Video: standard → Nova Reel Standard + { + message: "Create a 5 second video clip", + history: [], + expectedTier: "novaReelStandard", + }, + + // Video: premium → Nova Reel Premium + { + message: "Generate a 60-second premium cinematic video with smooth professional camera work", + history: [], + expectedTier: "novaReelPremium", + }, + ]; + + const results = []; + + console.log("\n=== MODEL SWITCHING TEST ==="); + + for (const testCase of testCases) { + const decision = decideUnifiedModelSwitch( + testCase.message, + testCase.history, + agent as any, + { userTier: "personal" } + ); + + const result = { + message: testCase.message, + modality: decision.modality, + selectedModel: decision.selectedModel.name, + complexityScore: decision.complexityScore, + estimatedCost: decision.estimatedCost, + reasoning: decision.reasoning, + expectedTier: testCase.expectedTier, + match: decision.selectedModel.modelId.includes(testCase.expectedTier) || + decision.selectedModel.name.toLowerCase().includes(testCase.expectedTier.toLowerCase()), + }; + + results.push(result); + + console.log(`\nMessage: "${testCase.message}"`); + console.log(`Modality: ${decision.modality}`); + console.log(`Selected: ${decision.selectedModel.name}`); + console.log(`Complexity: ${decision.complexityScore}/100`); + console.log(`Cost: $${decision.estimatedCost.toFixed(4)}`); + console.log(`Reasoning: ${decision.reasoning}`); + console.log(`Expected: ${testCase.expectedTier}`); + console.log(`Match: ${result.match ? "✓" : "✗"}`); + } + + return { results }; + }, +}); + +/** + * Test tier-based routing + */ +export const testTierBasedRouting = action({ + args: { + agentId: v.id("agents"), + }, + handler: async (ctx, args) => { + const agent = await ctx.runQuery(api.agents.get, { id: args.agentId }); + if (!agent) { + throw new Error("Agent not found"); + } + + const message = "Explain quantum computing step by step with detailed reasoning"; + const history: Array<{ role: string; content: string }> = []; + + const tiers: Array<"freemium" | "personal" | "enterprise"> = ["freemium", "personal", "enterprise"]; + const results = []; + + console.log("\n=== TIER-BASED ROUTING TEST ==="); + console.log(`Message: "${message}"\n`); + + for (const tier of tiers) { + const decision = decideUnifiedModelSwitch( + message, + history, + agent as any, + { userTier: tier } + ); + + const result = { + tier, + selectedModel: decision.selectedModel.name, + complexityScore: decision.complexityScore, + estimatedCost: decision.estimatedCost, + }; + + results.push(result); + + console.log(`Tier: ${tier}`); + console.log(`Selected: ${decision.selectedModel.name}`); + console.log(`Cost: $${decision.estimatedCost.toFixed(4)}\n`); + } + + return { results }; + }, +}); + +/** + * Test conversation history impact + */ +export const testConversationHistory = action({ + args: { + agentId: v.id("agents"), + }, + handler: async (ctx, args) => { + const agent = await ctx.runQuery(api.agents.get, { id: args.agentId }); + if (!agent) { + throw new Error("Agent not found"); + } + + const message = "What about databases?"; + + const histories = [ + // No history + [], + + // Short history + [ + { role: "user", content: "Hello" }, + { role: "assistant", content: "Hi! How can I help?" }, + ], + + // Long history with tool calls + [ + { role: "user", content: "Explain indexing" }, + { role: "assistant", content: "Indexing is... tool_use..." }, + { role: "user", content: "Can you show examples?" }, + { role: "assistant", content: "Here are examples... tool_use..." }, + { role: "user", content: "What about performance?" }, + { role: "assistant", content: "Performance considerations..." }, + ], + + // History with failures + [ + { role: "user", content: "Solve this problem" }, + { role: "assistant", content: "I encountered an error..." }, + { role: "user", content: "Try again" }, + { role: "assistant", content: "Still failed..." }, + ], + ]; + + const results = []; + + console.log("\n=== CONVERSATION HISTORY TEST ==="); + console.log(`Message: "${message}"\n`); + + for (let i = 0; i < histories.length; i++) { + const decision = decideUnifiedModelSwitch( + message, + histories[i], + agent as any, + { userTier: "personal" } + ); + + const result = { + historyLength: histories[i].length, + selectedModel: decision.selectedModel.name, + complexityScore: decision.complexityScore, + reasoning: decision.reasoning, + }; + + results.push(result); + + console.log(`History Length: ${histories[i].length}`); + console.log(`Selected: ${decision.selectedModel.name}`); + console.log(`Complexity: ${decision.complexityScore}/100`); + console.log(`Reasoning: ${decision.reasoning}\n`); + } + + return { results }; + }, +}); + +/** + * Run all tests + * + * Note: To run all tests, call each test action individually from the UI: + * 1. testModalityDetection + * 2. testComplexityScoring + * 3. testModelSwitching + * 4. testTierBasedRouting + * 5. testConversationHistory + * + * This master test combines modality detection and complexity scoring only (no agent needed). + */ +export const runBasicTests = action({ + args: {}, + handler: async (_ctx) => { + console.log("\n========================================"); + console.log("UNIFIED MODALITY SWITCHING - BASIC TESTS"); + console.log("========================================\n"); + + // Test 1: Modality Detection + console.log("\n=== TEST 1: MODALITY DETECTION ===\n"); + const modalityTests = [ + { message: "What is the capital of France?", expected: "text" }, + { message: "Generate an image of a sunset", expected: "image" }, + { message: "Create a video showing product demo", expected: "video" }, + { message: "Convert this to voice: Hello world", expected: "speech" }, + { message: "Create a video with narration", expected: "multimodal" }, + ]; + + const modalityResults = modalityTests.map(({ message, expected }) => { + const detected = detectModality(message); + const pass = detected === expected; + console.log(`${pass ? "✓" : "✗"} "${message}"`); + console.log(` Expected: ${expected}, Got: ${detected}\n`); + return { message, expected, detected, pass }; + }); + + // Test 2: Complexity Scoring + console.log("\n=== TEST 2: COMPLEXITY SCORING ===\n"); + const complexityTests = [ + { message: "Hello", modality: "text" as Modality, expectedRange: [0, 30] }, + { message: "Explain neural networks step by step", modality: "text" as Modality, expectedRange: [30, 60] }, + { message: "Think step by step and explain in extreme detail how transformers work with code examples", modality: "text" as Modality, expectedRange: [60, 100] }, + ]; + + const complexityResults = complexityTests.map(({ message, modality, expectedRange }) => { + const signals = analyzeUnifiedComplexity(message, modality, []); + const score = calculateUnifiedComplexityScore(signals, modality); + const inRange = score >= expectedRange[0] && score <= expectedRange[1]; + console.log(`${inRange ? "✓" : "✗"} "${message}"`); + console.log(` Score: ${score}/100, Expected: ${expectedRange[0]}-${expectedRange[1]}\n`); + return { message, score, expectedRange, inRange }; + }); + + const allPassed = + modalityResults.every((r) => r.pass) && + complexityResults.every((r) => r.inRange); + + console.log("\n========================================"); + console.log(`BASIC TESTS ${allPassed ? "PASSED ✓" : "FAILED ✗"}`); + console.log("========================================\n"); + + return { + summary: { + totalTests: modalityResults.length + complexityResults.length, + passed: modalityResults.filter((r) => r.pass).length + complexityResults.filter((r) => r.inRange).length, + failed: modalityResults.filter((r) => !r.pass).length + complexityResults.filter((r) => !r.inRange).length, + allPassed, + }, + modalityDetection: modalityResults, + complexityScoring: complexityResults, + }; + }, +}); diff --git a/convex/unifiedUserIdentity.ts b/convex/unifiedUserIdentity.ts new file mode 100644 index 0000000..535c6bf --- /dev/null +++ b/convex/unifiedUserIdentity.ts @@ -0,0 +1,174 @@ +/** + * Unified User Identity System + * + * Handles both authenticated and anonymous users with a single identity. + * Prevents multiple accounts for the same user across different auth methods. + */ + +import { mutation, query } from "./_generated/server"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +/** + * Get or create unified user identity + * Returns same userId for: + * - Logged in users (by auth) + * - Anonymous users (by browser fingerprint/device ID) + * - Prevents duplicate accounts + */ +export const getUnifiedUserId = mutation({ + args: { + deviceId: v.optional(v.string()), // Browser fingerprint for anonymous + email: v.optional(v.string()), // If signing in, check for existing accounts + }, + handler: async (ctx, args) => { + // Try to get authenticated user + const authUserId = await getAuthUserId(ctx); + + if (authUserId) { + // User is logged in - use their auth ID + const user = await ctx.db.get(authUserId); + + // If they were previously anonymous with this device, merge accounts + // Security: Only merge if the authenticated user's record already has this deviceId + // to prevent account takeover via arbitrary deviceId submission + if (args.deviceId && user && user.deviceId === args.deviceId) { + const anonymousUser = await ctx.db + .query("users") + .withIndex("by_device_id", (q) => q.eq("deviceId", args.deviceId)) + .first(); + + if (anonymousUser && anonymousUser._id !== authUserId) { + // Merge anonymous data into authenticated account + await mergeAnonymousUser(ctx, anonymousUser._id, authUserId); + } + } + + return { + userId: authUserId, + isAnonymous: false, + isNewUser: !user, + }; + } + + // User is anonymous - find or create by device ID + if (args.deviceId) { + const existingAnonymous = await ctx.db + .query("users") + .withIndex("by_device_id", (q) => q.eq("deviceId", args.deviceId)) + .first(); + + if (existingAnonymous) { + return { + userId: existingAnonymous._id, + isAnonymous: true, + isNewUser: false, + }; + } + + // Create new anonymous user + const anonymousUserId = await ctx.db.insert("users", { + deviceId: args.deviceId, + isAnonymous: true, + tier: "freemium", + executionsThisMonth: 0, + createdAt: Date.now(), + }); + + return { + userId: anonymousUserId, + isAnonymous: true, + isNewUser: true, + }; + } + + throw new Error("Must provide either authentication or deviceId"); + }, +}); + +/** + * Merge anonymous user data into authenticated account + */ +async function mergeAnonymousUser(ctx: any, anonymousId: any, authenticatedId: any) { + // Transfer agents + const agents = await ctx.db + .query("agents") + .withIndex("by_user", (q: any) => q.eq("createdBy", anonymousId)) + .collect(); + + for (const agent of agents) { + await ctx.db.patch(agent._id, { createdBy: authenticatedId }); + } + + // Transfer workflows + const workflows = await ctx.db + .query("workflows") + .withIndex("by_user", (q: any) => q.eq("userId", anonymousId)) + .collect(); + + for (const workflow of workflows) { + await ctx.db.patch(workflow._id, { userId: authenticatedId }); + } + + // Transfer conversations + const conversations = await ctx.db + .query("interleavedConversations") + .withIndex("by_user", (q: any) => q.eq("userId", anonymousId)) + .collect(); + + for (const conv of conversations) { + await ctx.db.patch(conv._id, { userId: authenticatedId }); + } + + // Mark anonymous user as merged (don't delete, keep for audit) + await ctx.db.patch(anonymousId, { + mergedInto: authenticatedId, + mergedAt: Date.now(), + }); +} + +/** + * Check if user has access to a specific agent + * (for unified access control across all builders) + */ +export const canAccessAgent = query({ + args: { + agentId: v.id("agents"), + deviceId: v.optional(v.string()), + }, + handler: async (ctx, args) => { + const authUserId = await getAuthUserId(ctx); + + // Get user identity + let userId = authUserId; + if (!userId && args.deviceId) { + const anonymousUser = await ctx.db + .query("users") + .withIndex("by_device_id", (q: any) => q.eq("deviceId", args.deviceId)) + .first(); + userId = anonymousUser?._id || null; + } + + if (!userId) { + return { canAccess: false, reason: "No user identity" }; + } + + // Get agent + const agent = await ctx.db.get(args.agentId); + if (!agent) { + return { canAccess: false, reason: "Agent not found" }; + } + + // Check ownership + if (agent.createdBy === userId) { + return { canAccess: true, reason: "Owner" }; + } + + // Check if public + if (agent.isPublic) { + return { canAccess: true, reason: "Public" }; + } + + return { canAccess: false, reason: "Not authorized" }; + }, +}); diff --git a/convex/users.ts b/convex/users.ts new file mode 100644 index 0000000..0199077 --- /dev/null +++ b/convex/users.ts @@ -0,0 +1,47 @@ +/** + * User Management API + */ + +import { query, internalQuery } from "./_generated/server"; +import { v } from "convex/values"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +/** + * Create user role types + */ +export enum UserRole { + ADMIN = "admin", + USER = "user", + PAID = "paid", + GUEST = "guest", + ENTERPRISE = "enterprise", +} +/** + * Internal query to get user by ID (no auth required) + * Used by system actions like test execution + */ +export const getInternal = internalQuery( { + args: { + id: v.id( "users" ), + }, + handler: async ( ctx, args ) => { + return await ctx.db.get( args.id ); + }, +} ); + +/** + * Get current user profile + */ +export const getCurrentUser = query( { + args: {}, + handler: async ( ctx ) => { + const userId = await getAuthUserId( ctx ); + if ( !userId ) { + return null; + } + + // Get user by auth-resolved Convex document ID + const user = await ctx.db.get( userId ); + return user; + }, +} ); diff --git a/convex/workflowExecutor.ts b/convex/workflowExecutor.ts new file mode 100644 index 0000000..a0cb5aa --- /dev/null +++ b/convex/workflowExecutor.ts @@ -0,0 +1,787 @@ +/** + * Real Workflow Executor - FUNCTIONAL IMPLEMENTATION + * + * Executes visual workflow graphs with actual API calls: + * - Bedrock Converse API for AWS models + * - Ollama API for local models + * - MCP tools execution + * - Internal @tool execution + * - Message composition from atomic prompt nodes + */ + +"use node"; + +import { v } from "convex/values"; +import { action } from "./_generated/server"; +import { api, internal } from "./_generated/api"; +import type { WorkflowNode, WorkflowEdge } from "../src/types/workflowNodes"; +import { composeWorkflow } from "../src/engine/messageComposer"; +import { executeComposedMessages } from "./lib/messageExecutor"; + +async function getUserScope( ctx: any ): Promise { + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Authentication required to execute workflows." ); + } + + // subject and tokenIdentifier are always unique per user in Convex auth. + // email is a reasonable fallback. Do NOT use identity.provider alone — + // it is just the provider name (e.g., "github") and is the same for all users. + const scope = + identity.subject || + identity.tokenIdentifier || + identity.email; + + if ( !scope ) { + throw new Error( "Unable to resolve user identity." ); + } + + return scope; +} + +function detectDependencyCycle( + nodes: WorkflowNode[], + dependencies: Map +): string[] | null { + const state = new Map(); + const stack: string[] = []; + + const visit = ( nodeId: string ): string[] | null => { + const currentState = state.get( nodeId ); + if ( currentState === "visiting" ) { + const cycleStart = stack.indexOf( nodeId ); + const cyclePath = cycleStart >= 0 ? stack.slice( cycleStart ) : [nodeId]; + return [...cyclePath, nodeId]; + } + if ( currentState === "visited" ) { + return null; + } + + state.set( nodeId, "visiting" ); + stack.push( nodeId ); + for ( const depId of dependencies.get( nodeId ) || [] ) { + const cycle = visit( depId ); + if ( cycle ) { + return cycle; + } + } + stack.pop(); + state.set( nodeId, "visited" ); + return null; + }; + + for ( const node of nodes ) { + const cycle = visit( node.id ); + if ( cycle ) { + return cycle; + } + } + + return null; +} + +export const executeWorkflow = action( { + args: { + workflowId: v.id( "workflows" ), + input: v.any(), + runtimeInputs: v.optional( v.any() ), + }, + handler: async ( ctx, { workflowId, input, runtimeInputs } ) => { + const startTime = Date.now(); + + // 1. QUERY ACTUAL WORKFLOW FROM DATABASE + const workflow = await ctx.runQuery( internal.workflows.getInternal, { workflowId } ); + if ( !workflow ) { + throw new Error( `Workflow ${workflowId} not found` ); + } + + // Enforce workflow ownership before execution. + const userScope = await getUserScope( ctx ); + if ( workflow.userId !== userScope ) { + throw new Error( "Workflow not found for current user" ); + } + + // DB stores loosely typed data; validated at save time via sanitizeNode + const nodes = workflow.nodes as unknown as WorkflowNode[]; + const edges = workflow.edges as unknown as WorkflowEdge[]; + + if ( !nodes.length ) { + throw new Error( "Workflow has no nodes" ); + } + + // 2. DETECT WORKFLOW PATTERN + const hasPromptNodes = nodes.some( ( n ) => n.data.type === "Prompt" ); + const hasModelNodes = nodes.some( ( n ) => n.data.type === "Model" || n.data.type === "ModelSet" ); + const hasToolNodes = nodes.some( ( n ) => n.data.type === "Tool" || n.data.type === "ToolSet" ); + const hasRouterNodes = nodes.some( ( n ) => n.data.type === "Router" ); + const hasAgentNodes = nodes.some( ( n ) => n.data.type === "Agent" ); + + // 3. CHOOSE EXECUTION STRATEGY + if ( hasAgentNodes ) { + // Agent-driven workflow — delegate to strands agent or multi-agent runtime + return await executeAgentWorkflow( ctx, { nodes, edges, input, startTime } ); + } else if ( hasPromptNodes && hasModelNodes && !hasRouterNodes ) { + // Use message composer for Prompt + Model workflows + return await executePromptModelWorkflow( ctx, { nodes, edges, input, runtimeInputs, startTime } ); + } else if ( hasRouterNodes ) { + // Execute with conditional routing + return await executeRoutedWorkflow( ctx, { nodes, edges, input, startTime } ); + } else if ( hasToolNodes && !hasPromptNodes && !hasModelNodes ) { + // Tool-only workflow — execute tools in DAG order + return await executeDAGWorkflow( ctx, { nodes, edges, input, startTime } ); + } else { + // Generic DAG execution for all other patterns + return await executeDAGWorkflow( ctx, { nodes, edges, input, startTime } ); + } + }, +} ); + + + +/** + * Execute Prompt + Model workflows using message composer + */ +async function executePromptModelWorkflow( + ctx: any, + { + nodes, + edges, + input, + runtimeInputs, + startTime, + }: { + nodes: WorkflowNode[]; + edges: WorkflowEdge[]; + input: any; + runtimeInputs?: any; + startTime: number; + } +) { + try { + // Compose messages from prompt nodes + const composed = composeWorkflow( nodes, edges, { + runtimeInputs: { ...input, ...( runtimeInputs || {} ) }, + } ); + + // Gate: enforce tier-based Bedrock access before executing + let gateResult: { allowed: true; userId: string; tier: string } | undefined; + if ( composed.kind === "bedrock" ) { + const { requireBedrockAccess } = await import( "./lib/bedrockGate" ); + const gate = await requireBedrockAccess( + ctx, + composed.bedrock?.modelId, + async ( args ) => ctx.runQuery( internal.users.getInternal, args ), + ); + if ( !gate.allowed ) { + throw new Error( gate.reason ); + } + gateResult = gate; + } + + // Execute composed messages with actual API calls + const result = await executeComposedMessages( composed ); + + // Meter token usage for billing + if ( result.tokenUsage && gateResult ) { + await ctx.runMutation( internal.stripeMutations.incrementUsageAndReportOverage, { + userId: gateResult.userId as any, + modelId: composed.bedrock?.modelId, + inputTokens: result.tokenUsage.inputTokens, + outputTokens: result.tokenUsage.outputTokens, + } ); + } + + return { + success: true, + result: { + text: result.text, + composed, + metadata: composed.metadata, + }, + executionLog: [ + { + nodeType: "Prompt+Model", + executionTime: Date.now() - startTime, + result: { text: result.text }, + }, + ], + executionTime: Date.now() - startTime, + }; + } catch ( error: any ) { + return { + success: false, + error: error.message, + executionTime: Date.now() - startTime, + }; + } +} + +/** + * Execute workflows with Router nodes (conditional branching) + */ +async function executeRoutedWorkflow( + ctx: any, + { + nodes, + edges, + input, + startTime, + }: { + nodes: WorkflowNode[]; + edges: WorkflowEdge[]; + input: any; + startTime: number; + } +) { + const nodeResults = new Map(); + const executionLog: any[] = []; + + // Find entry point (node with no incoming edges) + const incomingEdges = new Set( edges.map( ( e ) => e.target ) ); + const entryNodes = nodes.filter( ( n ) => !incomingEdges.has( n.id ) ); + + if ( entryNodes.length === 0 ) { + throw new Error( "No entry point found in workflow" ); + } + + if ( entryNodes.length > 1 ) { + throw new Error( `Workflow has ${entryNodes.length} entry points (nodes with no incoming edges). Expected exactly one.` ); + } + + // Execute starting from entry point + let currentNodeId = entryNodes[0].id; + let currentInput = input; + let iterations = 0; + const maxIterations = 100; // Prevent infinite loops + + while ( currentNodeId && iterations < maxIterations ) { + iterations++; + const node = nodes.find( ( n ) => n.id === currentNodeId ); + if ( !node ) break; + + const nodeStartTime = Date.now(); + + // Execute node + const result = await executeNode( ctx, node, currentInput, nodeResults ); + + executionLog.push( { + nodeId: node.id, + nodeType: node.data.type, + nodeLabel: node.data.label || node.id, + executionTime: Date.now() - nodeStartTime, + result, + } ); + + nodeResults.set( node.id, result ); + currentInput = result; + + // Determine next node based on Router logic + if ( node.data.type === "Router" ) { + currentNodeId = await evaluateRouterConditions( node, result, edges ); + } else { + // Follow first outgoing edge + const outgoing = edges.filter( ( e ) => e.source === node.id ); + currentNodeId = outgoing.length > 0 ? outgoing[0].target : ""; + } + } + + return { + success: true, + result: currentInput, + executionLog, + executionTime: Date.now() - startTime, + iterations, + }; +} + +/** + * Execute workflows as DAG (topological order, supports parallelism) + */ +async function executeDAGWorkflow( + ctx: any, + { + nodes, + edges, + input, + startTime, + }: { + nodes: WorkflowNode[]; + edges: WorkflowEdge[]; + input: any; + startTime: number; + } +) { + const nodeResults = new Map(); + const executionLog: any[] = []; + const executed = new Set(); + + // Build dependency map + const dependencies = new Map(); + edges.forEach( ( edge ) => { + const deps = dependencies.get( edge.target ) || []; + deps.push( edge.source ); + dependencies.set( edge.target, deps ); + } ); + + const cycle = detectDependencyCycle( nodes, dependencies ); + if ( cycle ) { + throw new Error( + `Workflow contains a cycle: ${cycle.join( " -> " )}. DAG execution requires an acyclic graph.` + ); + } + + // In-flight promises prevent duplicate concurrent executions of the same node + const inFlight = new Map>(); + + // Recursive execution with deduplication + async function executeNodeRecursive( nodeId: string ): Promise { + if ( executed.has( nodeId ) ) { + return nodeResults.get( nodeId ); + } + + // Return existing in-flight promise to prevent race conditions + if ( inFlight.has( nodeId ) ) { + return inFlight.get( nodeId ); + } + + const promise = ( async () => { + const node = nodes.find( ( n ) => n.id === nodeId ); + if ( !node ) throw new Error( `Node ${nodeId} not found` ); + + // Execute dependencies first (parallel if independent) + const deps = dependencies.get( nodeId ) || []; + const depResults = await Promise.all( + deps.map( ( depId ) => executeNodeRecursive( depId ) ) + ); + + // Merge dependency outputs + const nodeInput = depResults.length === 1 ? depResults[0] : depResults.length > 1 ? depResults : input; + + // Execute this node + const nodeStartTime = Date.now(); + const result = await executeNode( ctx, node, nodeInput, nodeResults ); + + executionLog.push( { + nodeId: node.id, + nodeType: node.data.type, + nodeLabel: node.data.label || node.id, + executionTime: Date.now() - nodeStartTime, + result, + } ); + + executed.add( nodeId ); + nodeResults.set( nodeId, result ); + + return result; + } )(); + + inFlight.set( nodeId, promise ); + return promise; + } + + // Find output nodes (nodes with no outgoing edges) + const outgoingEdges = new Set( edges.map( ( e ) => e.source ) ); + const outputNodes = nodes.filter( ( n ) => !outgoingEdges.has( n.id ) ); + + if ( outputNodes.length === 0 ) { + // No explicit output, execute all nodes + await Promise.all( nodes.map( ( n ) => executeNodeRecursive( n.id ) ) ); + // Deterministic: pick the last node in the original array that was executed + let lastExecutedNodeId = nodes[nodes.length - 1].id; + for ( let i = nodes.length - 1; i >= 0; i-- ) { + if ( nodeResults.has( nodes[i].id ) ) { + lastExecutedNodeId = nodes[i].id; + break; + } + } + return { + success: true, + result: nodeResults.get( lastExecutedNodeId ), + executionLog, + executionTime: Date.now() - startTime, + }; + } + + // Execute all output nodes + const results = await Promise.all( + outputNodes.map( ( n ) => executeNodeRecursive( n.id ) ) + ); + + return { + success: true, + result: results.length === 1 ? results[0] : results, + executionLog, + executionTime: Date.now() - startTime, + }; +} + +/** + * Execute Agent-driven workflows + * - "direct" mode: single agent execution via strandsAgentExecution + * - "swarm"/"graph"/"workflow" mode: multi-agent execution via multiAgentRuntime + */ +async function executeAgentWorkflow( + ctx: any, + { + nodes, + edges, + input, + startTime, + }: { + nodes: WorkflowNode[]; + edges: WorkflowEdge[]; + input: any; + startTime: number; + } +): Promise<{ success: boolean; result?: any; error?: string; executionLog?: any[]; executionTime: number }> { + const executionLog: any[] = []; + const agentNodes = nodes.filter( ( n ) => n.data.type === "Agent" ); + + if ( agentNodes.length === 0 ) { + throw new Error( "No Agent nodes found in workflow" ); + } + + // Use the first Agent node as the primary agent + const primaryAgent = agentNodes[0]; + const agentConfig = primaryAgent.data.config as any; + const executionMode = agentConfig.executionMode || "direct"; + const message = typeof input === "string" ? input : ( input?.message ?? JSON.stringify( input ) ); + + if ( executionMode === "direct" ) { + // Single agent execution + if ( !agentConfig.agentId ) { + return { + success: false, + error: "Agent node requires an agentId for direct execution. Select an agent in the node settings.", + executionTime: Date.now() - startTime, + }; + } + + try { + const result: any = await ctx.runAction( api.strandsAgentExecution.executeAgentWithStrandsAgents, { + agentId: agentConfig.agentId, + message, + } ); + + executionLog.push( { + nodeId: primaryAgent.id, + nodeType: "Agent", + nodeLabel: primaryAgent.data.label || agentConfig.name || "Agent", + executionTime: Date.now() - startTime, + result, + } ); + + return { + success: result.success, + result: result, + executionLog, + executionTime: Date.now() - startTime, + }; + } catch ( error: any ) { + return { + success: false, + error: error.message, + executionLog, + executionTime: Date.now() - startTime, + }; + } + } + + // Multi-agent mode (swarm/graph/workflow) + // Gather SubAgent nodes connected to the primary Agent + const subAgentNodes = nodes.filter( ( n ) => n.data.type === "SubAgent" ); + const connectedSubAgents = subAgentNodes.filter( ( sub ) => + edges.some( ( e ) => e.target === primaryAgent.id && e.source === sub.id ) + ); + + if ( !agentConfig.agentId ) { + return { + success: false, + error: "Agent node requires an agentId for multi-agent execution.", + executionTime: Date.now() - startTime, + }; + } + + const agentsList = connectedSubAgents + .map( ( sub ) => { + const subConfig = sub.data.config as any; + if ( !subConfig.agentId ) return null; + return { + agentId: subConfig.agentId, + role: subConfig.role || sub.data.label || "worker", + }; + } ) + .filter( Boolean ); + + if ( agentsList.length === 0 ) { + return { + success: false, + error: `Agent is in "${executionMode}" mode but has no connected SubAgent nodes with valid agentIds.`, + executionTime: Date.now() - startTime, + }; + } + + try { + const pattern = executionMode === "swarm" ? "swarm" : executionMode === "graph" ? "graph" : "workflow"; + const result: any = await ctx.runAction( api.multiAgentRuntime.executeMultiAgentPattern, { + parentAgentId: agentConfig.agentId, + pattern, + agents: agentsList, + executionMode: pattern === "swarm" ? "parallel" : "sequential", + sharedContext: { input: message }, + } ); + + executionLog.push( { + nodeId: primaryAgent.id, + nodeType: "Agent", + nodeLabel: primaryAgent.data.label || agentConfig.name || "Agent", + mode: executionMode, + subAgents: agentsList.length, + executionTime: Date.now() - startTime, + result, + } ); + + return { + success: true, + result, + executionLog, + executionTime: Date.now() - startTime, + }; + } catch ( error: any ) { + return { + success: false, + error: error.message, + executionLog, + executionTime: Date.now() - startTime, + }; + } +} + +/** + * Execute individual node based on type - REAL IMPLEMENTATION + */ +async function executeNode( + ctx: any, + node: WorkflowNode, + input: any, + nodeResults: Map +): Promise { + const nodeType = node.data.type; + const config = node.data.config; + + switch ( nodeType ) { + case "Prompt": + case "PromptText": + case "Background": + case "Context": + case "OutputIndicator": + // These are composed by message composer, not executed directly + return { nodeType, config, passthrough: true }; + + case "Model": { + // Model nodes store config but don't execute alone + const modelConfig = config as any; + return { modelId: modelConfig.modelId || modelConfig.model, provider: modelConfig.provider }; + } + + case "ModelSet": { + // ModelSet resolved by message composer + const modelSetConfig = config as any; + return { strategy: modelSetConfig.strategy }; + } + + case "Tool": { + // REAL TOOL EXECUTION + const toolConfig = config as any; + + if ( toolConfig.kind === "mcp" ) { + // Execute MCP tool + try { + const result = await ctx.runAction( internal.mcpClient.invokeMCPToolInternal, { + serverName: toolConfig.server || "default", + toolName: toolConfig.tool, + parameters: toolConfig.params || input, + } ); + return result; + } catch ( error: any ) { + return { success: false, error: error.message, toolType: "mcp" }; + } + } else if ( toolConfig.kind === "internal" ) { + // Execute internal @tool + const toolName = toolConfig.name; + const toolMap: Record = { + handoff_to_user: api.tools.handoffToUser, + short_term_memory: api.tools.shortTermMemory, + long_term_memory: api.tools.longTermMemory, + semantic_memory: api.tools.semanticMemory, + self_consistency: api.tools.selfConsistency, + tree_of_thoughts: api.tools.treeOfThoughts, + reflexion: api.tools.reflexion, + map_reduce: api.tools.mapReduce, + parallel_prompts: api.tools.parallelPrompts, + }; + + const toolAction = toolMap[toolName] || toolMap[toolName.toLowerCase().replace( / /g, "_" )]; + + if ( toolAction ) { + try { + return await ctx.runAction( toolAction, toolConfig.args || input ); + } catch ( error: any ) { + return { success: false, error: error.message, toolType: "internal", toolName }; + } + } else { + return await ctx.runAction( api.tools.executeStrandsTool, { + toolName, + params: toolConfig.args || input, + context: { nodeId: node.id }, + } ); + } + } else if ( toolConfig.kind === "openapi" ) { + // OpenAPI execution (TODO: implement swagger client) + return { + success: false, + error: "OpenAPI tool execution not yet implemented", + toolType: "openapi", + specUri: toolConfig.specUri, + operation: toolConfig.opId, + }; + } + + return { success: false, error: "Unknown tool kind" }; + } + + case "ToolSet": { + // Execute multiple tools based on call policy + const toolSetConfig = config as any; + const allowedTools = toolSetConfig.allowList || []; + const callPolicy = toolSetConfig.callPolicy || "model-first"; + const maxParallel = toolSetConfig.maxParallel || 3; + + // Find connected Tool nodes + const toolNodes = allowedTools + .map( ( toolId: string ) => { + // Look in nodeResults for tool outputs + return nodeResults.get( toolId ); + } ) + .filter( Boolean ); + + if ( callPolicy === "tool-first" ) { + // Execute tools first, then model + return { toolResults: toolNodes, callPolicy }; + } else { + // Model-first or interleave + return { toolsAvailable: toolNodes.length, callPolicy }; + } + } + + case "Memory": { + // REAL MEMORY EXECUTION + const memoryConfig = config as any; + + if ( memoryConfig.source === "convex" ) { + // TODO: Query Convex database + return { source: "convex", index: memoryConfig.index, topK: memoryConfig.topK }; + } else if ( memoryConfig.source === "s3" ) { + // TODO: Query S3 + return { source: "s3", index: memoryConfig.index }; + } else if ( memoryConfig.source === "vector_db" ) { + // TODO: Query vector database + return { source: "vector_db", index: memoryConfig.index, topK: memoryConfig.topK }; + } + + return { source: memoryConfig.source, notImplemented: true }; + } + + case "Router": + // Router logic handled in executeRoutedWorkflow + return { routerNode: true, config }; + + case "Entrypoint": { + // Entrypoint defines runtime, doesn't execute + const entrypointConfig = config as any; + return { runtime: entrypointConfig.runtime, path: entrypointConfig.path }; + } + + case "Agent": { + // Agent execution — delegated to strands agent or multi-agent runtime + // Direct execution happens in executeAgentWorkflow; here we return config for DAG + const agentConfig = config as any; + return { + nodeType: "Agent", + agentId: agentConfig.agentId, + executionMode: agentConfig.executionMode || "direct", + name: agentConfig.name, + }; + } + + case "SubAgent": { + // SubAgent provides config to parent Agent node — not executed independently + const subAgentConfig = config as any; + return { + nodeType: "SubAgent", + agentId: subAgentConfig.agentId, + role: subAgentConfig.role, + communicationProtocol: subAgentConfig.communicationProtocol || "hierarchical", + }; + } + + default: + // Unknown node type, pass through + return { nodeType, input }; + } +} + +/** + * Evaluate Router conditions to determine next node + */ +async function evaluateRouterConditions( + node: WorkflowNode, + result: any, + edges: WorkflowEdge[] +): Promise { + const config = node.data.config as any; + const conditions = config.conditions || []; + + for ( const condition of conditions ) { + // Simple expression evaluation (unsafe, TODO: use safe evaluator) + try { + const context = { result, success: result?.success }; + const evalResult = evaluateExpression( condition.expression, context ); + + if ( evalResult ) { + return condition.thenNode; + } else if ( condition.type === "if" && condition.elseNode ) { + return condition.elseNode; + } + } catch ( error ) { + console.error( "Router condition evaluation failed:", error ); + } + } + + // Default: follow first outgoing edge + const outgoing = edges.filter( ( e ) => e.source === node.id ); + return outgoing.length > 0 ? outgoing[0].target : ""; +} + +/** + * Safe expression evaluator (simplified) + */ +function evaluateExpression( expression: string, context: Record ): boolean { + // Simple checks for now (TODO: use proper safe evaluator) + if ( expression.includes( "success" ) ) { + return context.success === true; + } + if ( expression.includes( "error" ) ) { + return context.result?.error !== undefined; + } + if ( expression.includes( "==" ) || expression.includes( "===" ) ) { + // Very basic equality check + const [left, right] = expression.split( /===?/ ).map( ( s ) => s.trim() ); + const leftValue = context[left] || context.result?.[left]; + const rightValue = right === "null" ? null : right === "true" ? true : right === "false" ? false : right; + return leftValue === rightValue; + } + + // Default false for safety + return false; +} diff --git a/convex/workflowTemplateSeeder.ts b/convex/workflowTemplateSeeder.ts new file mode 100644 index 0000000..149d1f0 --- /dev/null +++ b/convex/workflowTemplateSeeder.ts @@ -0,0 +1,515 @@ +/** + * Workflow Template Seeder + * + * Pre-built workflow templates that users can load into the visual scripting builder. + * All node types MUST be in the ALLOWED_NODE_TYPES set defined in workflows.ts. + * All node data MUST follow the { type, label, notes, config } structure. + */ + +import { internalMutation } from "./_generated/server"; + +export const seedWorkflowTemplates = internalMutation({ + handler: async (ctx) => { + // Check if already seeded + const existing = await ctx.db.query("workflowTemplates").first(); + if (existing) { + console.log("Workflow templates already seeded"); + return; + } + + const templates = [ + /** + * 1. CUSTOMER SUPPORT CHATBOT + */ + { + name: "Customer Support Chatbot", + description: "Full-featured customer support bot with knowledge base and escalation", + category: "Support", + icon: "💬", + difficulty: "Intermediate", + nodes: [ + { + id: "prompt-1", + type: "workflow", + position: { x: 200, y: 100 }, + data: { + type: "Prompt", + label: "Support Assistant", + notes: "Helpful, patient, and professional", + config: {}, + }, + }, + { + id: "memory-1", + type: "workflow", + position: { x: 200, y: 250 }, + data: { + type: "Memory", + label: "Conversation Memory", + notes: "Hybrid memory with similarity retrieval", + config: { + source: "convex", + topK: 10, + }, + }, + }, + { + id: "tool-rag-1", + type: "workflow", + position: { x: 400, y: 100 }, + data: { + type: "Tool", + label: "Knowledge Base Search", + notes: "RAG retrieval from customer support knowledge base", + config: { + kind: "internal", + name: "knowledge_base_search", + }, + }, + }, + { + id: "model-1", + type: "workflow", + position: { x: 400, y: 250 }, + data: { + type: "Model", + label: "Escalation Handler", + notes: "Senior support specialist for complex escalations", + config: { + provider: "bedrock", + temperature: 0.3, + maxTokens: 4096, + }, + }, + }, + ], + connections: [ + { + id: "e-prompt-memory", + source: "prompt-1", + target: "memory-1", + }, + { + id: "e-prompt-rag", + source: "prompt-1", + target: "tool-rag-1", + }, + { + id: "e-rag-prompt", + source: "tool-rag-1", + target: "prompt-1", + }, + { + id: "e-prompt-model", + source: "prompt-1", + target: "model-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + + /** + * 2. TECHNICAL ASSISTANT + */ + { + name: "Technical Assistant", + description: "Code analysis and debugging assistant with documentation access", + category: "Development", + icon: "🔧", + difficulty: "Advanced", + nodes: [ + { + id: "prompt-1", + type: "workflow", + position: { x: 200, y: 100 }, + data: { + type: "Prompt", + label: "Tech Assistant", + notes: "Technical, precise, and helpful. Code review, debugging, API docs.", + config: {}, + }, + }, + { + id: "tool-data-1", + type: "workflow", + position: { x: 400, y: 50 }, + data: { + type: "Tool", + label: "GitHub Data Connector", + notes: "Fetches issues, PRs, and code from GitHub API", + config: { + kind: "internal", + name: "github_integration", + }, + }, + }, + { + id: "model-1", + type: "workflow", + position: { x: 400, y: 150 }, + data: { + type: "Model", + label: "Code Analyzer", + notes: "Specializes in code review, security analysis, performance", + config: { + provider: "bedrock", + temperature: 0.2, + maxTokens: 4096, + }, + }, + }, + { + id: "tool-rag-1", + type: "workflow", + position: { x: 400, y: 250 }, + data: { + type: "Tool", + label: "Technical Docs Search", + notes: "Semantic search over technical documentation", + config: { + kind: "internal", + name: "documentation_search", + }, + }, + }, + ], + connections: [ + { + id: "e-prompt-data", + source: "prompt-1", + target: "tool-data-1", + }, + { + id: "e-data-model", + source: "tool-data-1", + target: "model-1", + }, + { + id: "e-prompt-rag", + source: "prompt-1", + target: "tool-rag-1", + }, + { + id: "e-rag-prompt", + source: "tool-rag-1", + target: "prompt-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + + /** + * 3. RESEARCH ASSISTANT + */ + { + name: "Research Assistant", + description: "Web research and data analysis with structured output", + category: "Research", + icon: "🔍", + difficulty: "Intermediate", + nodes: [ + { + id: "prompt-1", + type: "workflow", + position: { x: 200, y: 150 }, + data: { + type: "Prompt", + label: "Research Assistant", + notes: "Analytical, thorough, and objective. Web research, data analysis, reports.", + config: {}, + }, + }, + { + id: "prompt-cot-1", + type: "workflow", + position: { x: 400, y: 50 }, + data: { + type: "Prompt", + label: "Chain of Thought Reasoning", + notes: "Step-by-step reasoning with evidence requirements", + config: {}, + }, + }, + { + id: "tool-rag-1", + type: "workflow", + position: { x: 400, y: 150 }, + data: { + type: "Tool", + label: "Research Papers Search", + notes: "Hybrid retrieval from research paper knowledge base", + config: { + kind: "internal", + name: "research_search", + }, + }, + }, + { + id: "tool-data-1", + type: "workflow", + position: { x: 400, y: 250 }, + data: { + type: "Tool", + label: "Academic Data Connector", + notes: "Searches academic APIs with deduplication and relevance sorting", + config: { + kind: "internal", + name: "academic_search", + }, + }, + }, + ], + connections: [ + { + id: "e-prompt-cot", + source: "prompt-1", + target: "prompt-cot-1", + }, + { + id: "e-cot-rag", + source: "prompt-cot-1", + target: "tool-rag-1", + }, + { + id: "e-rag-data", + source: "tool-rag-1", + target: "tool-data-1", + }, + { + id: "e-data-prompt", + source: "tool-data-1", + target: "prompt-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + + /** + * 4. CHAIN OF THOUGHT DEBUGGER + */ + { + name: "Chain of Thought Debugger", + description: "Step-by-step reasoning for complex problem solving", + category: "Reasoning", + icon: "🧠", + difficulty: "Advanced", + nodes: [ + { + id: "prompt-cot-1", + type: "workflow", + position: { x: 200, y: 100 }, + data: { + type: "Prompt", + label: "Chain of Thought Analyzer", + notes: "Enables deep thinking with evidence requirements", + config: {}, + }, + }, + { + id: "prompt-thought-1", + type: "workflow", + position: { x: 400, y: 100 }, + data: { + type: "Prompt", + label: "Systematic Debugger", + notes: "Steps: Problem Analysis, Hypothesis Generation, Evidence Gathering, Solution Formulation", + config: {}, + }, + }, + { + id: "model-1", + type: "workflow", + position: { x: 600, y: 100 }, + data: { + type: "Model", + label: "Reasoning Model", + notes: "Low temperature for precise reasoning", + config: { + provider: "bedrock", + temperature: 0.3, + maxTokens: 4096, + }, + }, + }, + ], + connections: [ + { + id: "e-cot-thought", + source: "prompt-cot-1", + target: "prompt-thought-1", + }, + { + id: "e-thought-model", + source: "prompt-thought-1", + target: "model-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + + /** + * 5. RAG KNOWLEDGE BOT + */ + { + name: "RAG Knowledge Bot", + description: "Knowledge base chatbot with retrieval augmented generation", + category: "Knowledge", + icon: "📚", + difficulty: "Beginner", + nodes: [ + { + id: "prompt-1", + type: "workflow", + position: { x: 200, y: 150 }, + data: { + type: "Prompt", + label: "Knowledge Bot", + notes: "Knowledgeable and precise. Answers from documentation.", + config: {}, + }, + }, + { + id: "tool-rag-1", + type: "workflow", + position: { x: 400, y: 150 }, + data: { + type: "Tool", + label: "Knowledge Base Search", + notes: "Hybrid retrieval with reranking and source citation", + config: { + kind: "internal", + name: "knowledge_search", + }, + }, + }, + { + id: "memory-1", + type: "workflow", + position: { x: 400, y: 250 }, + data: { + type: "Memory", + label: "Conversation Memory", + notes: "Short-term recency-based memory", + config: { + source: "convex", + topK: 5, + }, + }, + }, + ], + connections: [ + { + id: "e-prompt-rag", + source: "prompt-1", + target: "tool-rag-1", + }, + { + id: "e-rag-prompt", + source: "tool-rag-1", + target: "prompt-1", + }, + { + id: "e-prompt-memory", + source: "prompt-1", + target: "memory-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + + /** + * 6. REACT AUTONOMOUS AGENT + */ + { + name: "ReAct Autonomous Agent", + description: "Self-directed agent that reasons and acts iteratively", + category: "Advanced", + icon: "🔄", + difficulty: "Expert", + nodes: [ + { + id: "router-1", + type: "workflow", + position: { x: 300, y: 100 }, + data: { + type: "Router", + label: "ReAct Decision Loop", + notes: "Iterates up to 10 times, requires confidence 0.9", + config: { + conditions: [ + { type: "if", expression: "success", thenNode: "model-1", elseNode: "tool-data-1" }, + ], + }, + }, + }, + { + id: "model-1", + type: "workflow", + position: { x: 500, y: 50 }, + data: { + type: "Model", + label: "Reasoning Model", + notes: "Creative temperature for exploration", + config: { + provider: "bedrock", + temperature: 0.7, + maxTokens: 8192, + }, + }, + }, + { + id: "tool-data-1", + type: "workflow", + position: { x: 500, y: 150 }, + data: { + type: "Tool", + label: "Action Executor", + notes: "Executes actions with validation and sanitization", + config: { + kind: "internal", + name: "action_executor", + }, + }, + }, + ], + connections: [ + { + id: "e-router-model", + source: "router-1", + target: "model-1", + }, + { + id: "e-model-data", + source: "model-1", + target: "tool-data-1", + }, + { + id: "e-data-router", + source: "tool-data-1", + target: "router-1", + }, + ], + isOfficial: true, + usageCount: 0, + createdAt: Date.now(), + }, + ]; + + // Insert all templates + for (const template of templates) { + await ctx.db.insert("workflowTemplates", template); + } + + console.log(`Seeded ${templates.length} workflow templates`); + }, +}); diff --git a/convex/workflowTemplates.ts b/convex/workflowTemplates.ts new file mode 100644 index 0000000..ce143db --- /dev/null +++ b/convex/workflowTemplates.ts @@ -0,0 +1,326 @@ +import { v } from "convex/values"; +import { mutation, query } from "./_generated/server"; +import { getAuthUserId } from "@convex-dev/auth/server"; + +// Workflow template definitions +export const workflowTemplates = { + chainOfThought: { + id: "chain-of-thought", + name: "Chain of Thought", + description: "Break down complex reasoning into sequential steps", + nodes: [ + { id: "input", type: "input", label: "Problem Input" }, + { id: "step1", type: "reasoning", label: "Step 1: Understand" }, + { id: "step2", type: "reasoning", label: "Step 2: Break Down" }, + { id: "step3", type: "reasoning", label: "Step 3: Solve" }, + { id: "output", type: "output", label: "Final Answer" } + ], + edges: [ + { from: "input", to: "step1" }, + { from: "step1", to: "step2" }, + { from: "step2", to: "step3" }, + { from: "step3", to: "output" } + ] + }, + + promptChaining: { + id: "prompt-chaining", + name: "Prompt Chaining", + description: "Chain multiple prompts where each output feeds the next", + nodes: [ + { id: "input", type: "input", label: "Initial Input" }, + { id: "prompt1", type: "llm", label: "Extract Key Info" }, + { id: "prompt2", type: "llm", label: "Analyze Context" }, + { id: "prompt3", type: "llm", label: "Generate Response" }, + { id: "output", type: "output", label: "Final Output" } + ], + edges: [ + { from: "input", to: "prompt1" }, + { from: "prompt1", to: "prompt2" }, + { from: "prompt2", to: "prompt3" }, + { from: "prompt3", to: "output" } + ] + }, + + parallelPrompts: { + id: "parallel-prompts", + name: "Parallel Prompts (Async)", + description: "Execute multiple prompts in parallel and aggregate results", + nodes: [ + { id: "input", type: "input", label: "Input" }, + { id: "split", type: "split", label: "Split Task" }, + { id: "prompt1", type: "llm", label: "Perspective 1" }, + { id: "prompt2", type: "llm", label: "Perspective 2" }, + { id: "prompt3", type: "llm", label: "Perspective 3" }, + { id: "aggregate", type: "aggregate", label: "Combine Results" }, + { id: "output", type: "output", label: "Synthesized Output" } + ], + edges: [ + { from: "input", to: "split" }, + { from: "split", to: "prompt1" }, + { from: "split", to: "prompt2" }, + { from: "split", to: "prompt3" }, + { from: "prompt1", to: "aggregate" }, + { from: "prompt2", to: "aggregate" }, + { from: "prompt3", to: "aggregate" }, + { from: "aggregate", to: "output" } + ] + }, + + rag: { + id: "rag", + name: "RAG (Retrieval Augmented Generation)", + description: "Retrieve relevant context before generating response", + nodes: [ + { id: "input", type: "input", label: "Query" }, + { id: "embed", type: "embedding", label: "Generate Embedding" }, + { id: "retrieve", type: "retrieval", label: "Search Knowledge Base" }, + { id: "rerank", type: "rerank", label: "Rerank Results" }, + { id: "generate", type: "llm", label: "Generate with Context" }, + { id: "output", type: "output", label: "Response" } + ], + edges: [ + { from: "input", to: "embed" }, + { from: "embed", to: "retrieve" }, + { from: "retrieve", to: "rerank" }, + { from: "rerank", to: "generate" }, + { from: "input", to: "generate" }, + { from: "generate", to: "output" } + ] + }, + + react: { + id: "react", + name: "ReAct (Reasoning + Acting)", + description: "Iterative reasoning and action execution loop", + nodes: [ + { id: "input", type: "input", label: "Task" }, + { id: "thought", type: "reasoning", label: "Thought" }, + { id: "action", type: "tool", label: "Action" }, + { id: "observation", type: "observation", label: "Observation" }, + { id: "decision", type: "decision", label: "Continue?" }, + { id: "output", type: "output", label: "Final Answer" } + ], + edges: [ + { from: "input", to: "thought" }, + { from: "thought", to: "action" }, + { from: "action", to: "observation" }, + { from: "observation", to: "decision" }, + { from: "decision", to: "thought", condition: "continue" }, + { from: "decision", to: "output", condition: "done" } + ] + }, + + selfConsistency: { + id: "self-consistency", + name: "Self-Consistency", + description: "Generate multiple reasoning paths and vote on best answer", + nodes: [ + { id: "input", type: "input", label: "Problem" }, + { id: "split", type: "split", label: "Generate Paths" }, + { id: "path1", type: "reasoning", label: "Reasoning Path 1" }, + { id: "path2", type: "reasoning", label: "Reasoning Path 2" }, + { id: "path3", type: "reasoning", label: "Reasoning Path 3" }, + { id: "vote", type: "vote", label: "Majority Vote" }, + { id: "output", type: "output", label: "Best Answer" } + ], + edges: [ + { from: "input", to: "split" }, + { from: "split", to: "path1" }, + { from: "split", to: "path2" }, + { from: "split", to: "path3" }, + { from: "path1", to: "vote" }, + { from: "path2", to: "vote" }, + { from: "path3", to: "vote" }, + { from: "vote", to: "output" } + ] + }, + + treeOfThoughts: { + id: "tree-of-thoughts", + name: "Tree of Thoughts", + description: "Explore multiple reasoning branches and select best path", + nodes: [ + { id: "input", type: "input", label: "Problem" }, + { id: "branch1", type: "reasoning", label: "Approach 1" }, + { id: "branch2", type: "reasoning", label: "Approach 2" }, + { id: "eval1", type: "evaluation", label: "Evaluate 1" }, + { id: "eval2", type: "evaluation", label: "Evaluate 2" }, + { id: "select", type: "selection", label: "Select Best" }, + { id: "expand", type: "reasoning", label: "Expand Solution" }, + { id: "output", type: "output", label: "Final Solution" } + ], + edges: [ + { from: "input", to: "branch1" }, + { from: "input", to: "branch2" }, + { from: "branch1", to: "eval1" }, + { from: "branch2", to: "eval2" }, + { from: "eval1", to: "select" }, + { from: "eval2", to: "select" }, + { from: "select", to: "expand" }, + { from: "expand", to: "output" } + ] + }, + + reflexion: { + id: "reflexion", + name: "Reflexion", + description: "Self-reflection and iterative improvement loop", + nodes: [ + { id: "input", type: "input", label: "Task" }, + { id: "attempt", type: "llm", label: "Generate Solution" }, + { id: "evaluate", type: "evaluation", label: "Self-Evaluate" }, + { id: "reflect", type: "reflection", label: "Reflect on Errors" }, + { id: "decision", type: "decision", label: "Good Enough?" }, + { id: "output", type: "output", label: "Final Solution" } + ], + edges: [ + { from: "input", to: "attempt" }, + { from: "attempt", to: "evaluate" }, + { from: "evaluate", to: "decision" }, + { from: "decision", to: "reflect", condition: "improve" }, + { from: "reflect", to: "attempt" }, + { from: "decision", to: "output", condition: "done" } + ] + }, + + mapReduce: { + id: "map-reduce", + name: "Map-Reduce", + description: "Process data in parallel chunks then combine results", + nodes: [ + { id: "input", type: "input", label: "Large Dataset" }, + { id: "split", type: "split", label: "Split into Chunks" }, + { id: "map1", type: "llm", label: "Process Chunk 1" }, + { id: "map2", type: "llm", label: "Process Chunk 2" }, + { id: "map3", type: "llm", label: "Process Chunk 3" }, + { id: "reduce", type: "aggregate", label: "Combine Results" }, + { id: "output", type: "output", label: "Final Summary" } + ], + edges: [ + { from: "input", to: "split" }, + { from: "split", to: "map1" }, + { from: "split", to: "map2" }, + { from: "split", to: "map3" }, + { from: "map1", to: "reduce" }, + { from: "map2", to: "reduce" }, + { from: "map3", to: "reduce" }, + { from: "reduce", to: "output" } + ] + }, + + humanInTheLoop: { + id: "human-in-the-loop", + name: "Human-in-the-Loop", + description: "Request human feedback at critical decision points", + nodes: [ + { id: "input", type: "input", label: "Task" }, + { id: "draft", type: "llm", label: "Generate Draft" }, + { id: "review", type: "human", label: "Human Review" }, + { id: "decision", type: "decision", label: "Approved?" }, + { id: "revise", type: "llm", label: "Revise Based on Feedback" }, + { id: "output", type: "output", label: "Final Output" } + ], + edges: [ + { from: "input", to: "draft" }, + { from: "draft", to: "review" }, + { from: "review", to: "decision" }, + { from: "decision", to: "output", condition: "approved" }, + { from: "decision", to: "revise", condition: "rejected" }, + { from: "revise", to: "review" } + ] + } +}; + +export const getWorkflowTemplates = query( { + handler: async () => { + return Object.values( workflowTemplates ); + } +} ); + +export const getWorkflowTemplate = query( { + args: { templateId: v.string() }, + handler: async ( _, { templateId } ) => { + return workflowTemplates[templateId as keyof typeof workflowTemplates] || null; + } +} ); + +/** + * Maps legacy template node types to valid NodeKind values for the DB schema. + */ +function mapNodeType( type: string ): string { + const typeMap: Record = { + input: "Prompt", + output: "OutputIndicator", + reasoning: "Prompt", + llm: "Model", + split: "Router", + aggregate: "Tool", + embedding: "Tool", + retrieval: "Tool", + rerank: "Tool", + tool: "Tool", + observation: "Context", + decision: "Router", + vote: "Tool", + evaluation: "Tool", + selection: "Router", + reflection: "Prompt", + human: "Tool", + }; + return typeMap[type] || "Prompt"; +} + +export const createWorkflowFromTemplate = mutation( { + args: { + templateId: v.string(), + name: v.string(), + userId: v.string() + }, + handler: async ( ctx, { templateId, name } ) => { + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Unauthorized" ); + } + const userId = await getAuthUserId( ctx ); + if ( !userId ) throw new Error( "Unauthorized" ); + + // use userId as the owner every time + + const template = workflowTemplates[templateId as keyof typeof workflowTemplates]; + if ( !template ) throw new Error( "Template not found" ); + + // Convert template nodes to workflow schema format + const nodes = template.nodes.map( ( n: { id: string; type: string; label: string }, i: number ) => ( { + id: n.id, + type: "workflow", + position: { x: 200 + ( i % 3 ) * 200, y: 100 + Math.floor( i / 3 ) * 150 }, + data: { + type: mapNodeType( n.type ), + label: n.label, + notes: "", + config: {}, + }, + } ) ); + + // Convert template edges to workflow schema format + const edges = template.edges.map( ( e: { from: string; to: string }, i: number ) => ( { + id: `e-${i}`, + source: e.from, + target: e.to, + } ) ); + + const workflowId = await ctx.db.insert( "workflows", { + name, + userId, + templateId: template.id, + nodes, + edges, + status: "draft", + createdAt: Date.now(), + updatedAt: Date.now() + } ); + + return { workflowId, workflow: template }; + } +} ); diff --git a/convex/workflows.ts b/convex/workflows.ts new file mode 100644 index 0000000..330ed4a --- /dev/null +++ b/convex/workflows.ts @@ -0,0 +1,535 @@ +import { action, mutation, query, internalQuery, internalMutation } from "./_generated/server"; +import type { Id } from "./_generated/dataModel"; +import { v } from "convex/values"; +import { api, internal } from "./_generated/api"; +import type { WorkflowNode } from "../src/types/workflowNodes"; +import { findToolMetadata, normalizeToolName } from "./lib/strandsTools"; + +async function getUserScope( ctx: any ): Promise { + const identity = await ctx.auth.getUserIdentity(); + if ( !identity ) { + throw new Error( "Authentication required to manage workflows." ); + } + + const scope = + identity.subject || + identity.tokenIdentifier || + identity.email || + identity.provider; + + if ( !scope ) { + throw new Error( "Unable to resolve user identity." ); + } + + return scope; +} + +const ALLOWED_NODE_TYPES = new Set( [ + "Prompt", + "PromptText", // Deprecated – kept for migration of saved workflows + "Background", + "Context", + "OutputIndicator", + "Model", + "ModelSet", + "Tool", + "ToolSet", + "Router", + "Memory", + "Entrypoint", + "Agent", + "SubAgent", + "Decision", + "Aggregate", + "Human", + "Embedding", + "Retrieval", + "Rerank", + "AwsService", + "Database", + "Storage", + "Compute", + "Networking", + "Security", + "Monitoring", + "AI-ML", +] ); + +function sanitizeString( value: unknown, maxLength: number ) { + if ( typeof value !== "string" ) { + return ""; + } + return value.slice( 0, maxLength ); +} + +function deepSanitize( value: any, maxStringLength = 4000, depth = 0 ): any { + if ( depth > 4 ) { + return null; + } + + if ( typeof value === "string" ) { + return sanitizeString( value, maxStringLength ); + } + + if ( Array.isArray( value ) ) { + return value.slice( 0, 50 ).map( ( entry ) => deepSanitize( entry, maxStringLength, depth + 1 ) ); + } + + if ( typeof value === "object" && value !== null ) { + const sanitized: Record = {}; + for ( const [key, val] of Object.entries( value ).slice( 0, 50 ) ) { + sanitized[key] = deepSanitize( val, maxStringLength, depth + 1 ); + } + return sanitized; + } + + if ( typeof value === "number" || typeof value === "boolean" ) { + return value; + } + + return null; +} + +function sanitizeNode( node: any ) { + if ( typeof node !== "object" || node === null ) { + throw new Error( "Invalid node payload." ); + } + + const id = sanitizeString( node.id, 128 ); + const data = node.data ?? {}; + const nodeKind = sanitizeString( data.type, 64 ); + + if ( !id ) { + throw new Error( "Workflow nodes must include an id." ); + } + + if ( !nodeKind || !ALLOWED_NODE_TYPES.has( nodeKind ) ) { + throw new Error( `Unsupported node type: ${nodeKind || "unknown"}.` ); + } + + const position = + typeof node.position === "object" && node.position !== null + ? { + x: Number( node.position.x ) || 0, + y: Number( node.position.y ) || 0, + } + : undefined; + + const label = sanitizeString( data.label, 256 ); + const notes = sanitizeString( data.notes, 4000 ); + + const config = + typeof data.config === "object" && data.config !== null + ? deepSanitize( data.config ) + : {}; + + return { + id, + type: "workflow", + position, + data: { + type: nodeKind, + label, + notes, + config, + }, + }; +} + +function sanitizeEdge( edge: any ) { + if ( typeof edge !== "object" || edge === null ) { + throw new Error( "Invalid edge payload." ); + } + + const id = sanitizeString( edge.id, 128 ); + const source = sanitizeString( edge.source, 128 ); + const target = sanitizeString( edge.target, 128 ); + const type = sanitizeString( edge.type ?? "smoothstep", 32 ); + + if ( !id || !source || !target ) { + throw new Error( "Edges must include id, source, and target." ); + } + + return { id, source, target, type }; +} + +function sanitizeWorkflowPayload( nodes: any[], edges: any[] ) { + if ( !Array.isArray( nodes ) || nodes.length === 0 ) { + throw new Error( "Workflows require at least one node." ); + } + + if ( nodes.length > 150 ) { + throw new Error( "Workflow node limit exceeded (max 150)." ); + } + + if ( !Array.isArray( edges ) ) { + throw new Error( "Edges payload must be an array." ); + } + + if ( edges.length > 300 ) { + throw new Error( "Workflow edge limit exceeded (max 300)." ); + } + + const sanitizedNodes = nodes.map( sanitizeNode ); + const sanitizedEdges = edges.map( sanitizeEdge ); + + return { sanitizedNodes, sanitizedEdges }; +} + +export const list = query( { + args: {}, + handler: async ( ctx ) => { + const userScope = await getUserScope( ctx ); + return await ctx.db + .query( "workflows" ) + .withIndex( "by_user", ( q ) => q.eq( "userId", userScope ) ) + .order( "desc" ) + .take( 50 ); + }, +} ); + +export const get = query( { + args: { + workflowId: v.id( "workflows" ), + }, + handler: async ( ctx, args ) => { + const workflow = await ctx.db.get( args.workflowId ); + if ( !workflow ) { + return null; + } + const userScope = await getUserScope( ctx ); + if ( workflow.userId !== userScope ) { + throw new Error( "Workflow not found for current user" ); + } + return workflow; + }, +} ); + +// Internal query for Node.js actions to fetch workflows without user scope check +export const getInternal = internalQuery( { + args: { + workflowId: v.id( "workflows" ), + }, + handler: async ( ctx, args ) => { + return await ctx.db.get( args.workflowId ); + }, +} ); + +// Internal mutation to update workflow status from actions +export const updateStatusInternal = internalMutation( { + args: { + workflowId: v.id( "workflows" ), + status: v.string(), + }, + handler: async ( ctx, args ) => { + await ctx.db.patch( args.workflowId, { + status: args.status, + updatedAt: Date.now(), + } ); + }, +} ); + +export const save = mutation( { + args: { + workflowId: v.optional( v.id( "workflows" ) ), + name: v.string(), + nodes: v.array( v.any() ), + edges: v.array( v.any() ), + templateId: v.optional( v.string() ), + status: v.optional( v.string() ), + }, + handler: async ( ctx, args ) => { + const userScope = await getUserScope( ctx ); + const now = Date.now(); + const status = args.status ?? "draft"; + const templateId = args.templateId ?? "custom"; + const { sanitizedNodes, sanitizedEdges } = sanitizeWorkflowPayload( args.nodes, args.edges ); + + if ( args.workflowId ) { + const existing = await ctx.db.get( args.workflowId ); + if ( !existing ) { + throw new Error( "Workflow not found" ); + } + if ( existing.userId !== userScope ) { + throw new Error( "Workflow not found for current user" ); + } + await ctx.db.patch( args.workflowId, { + name: args.name, + nodes: sanitizedNodes, + edges: sanitizedEdges, + status, + updatedAt: now, + } ); + return { workflowId: args.workflowId }; + } + + const workflowId = await ctx.db.insert( "workflows", { + name: args.name, + userId: userScope, + templateId, + nodes: sanitizedNodes, + edges: sanitizedEdges, + status, + createdAt: now, + updatedAt: now, + } ); + + return { workflowId }; + }, +} ); + +export const remove = mutation( { + args: { + workflowId: v.id( "workflows" ), + }, + handler: async ( ctx, args ) => { + const existing = await ctx.db.get( args.workflowId ); + if ( !existing ) { + return { removed: false }; + } + const userScope = await getUserScope( ctx ); + if ( existing.userId !== userScope ) { + throw new Error( "Workflow not found for current user" ); + } + await ctx.db.delete( args.workflowId ); + return { removed: true }; + }, +} ); + +type AgentBlueprint = { + name: string; + description: string; + model: string; + modelProvider: string; + deploymentType: "aws" | "ollama"; + systemPrompt: string; + tools: any[]; +}; + +export const publishAsAgent = action( { + args: { + workflowId: v.id( "workflows" ), + agentName: v.optional( v.string() ), + description: v.optional( v.string() ), + }, + handler: async ( ctx, args ): Promise<{ agentId: Id<"agents">; workflowId: Id<"workflows"> }> => { + const userScope = await getUserScope( ctx ); + const workflow = await ctx.runQuery( internal.workflows.getInternal, { + workflowId: args.workflowId, + } ); + + if ( !workflow ) { + throw new Error( "Workflow not found" ); + } + + if ( workflow.userId !== userScope ) { + throw new Error( "You do not have access to this workflow" ); + } + + const blueprint = buildAgentBlueprint( { + workflow, + requestedName: args.agentName, + requestedDescription: args.description, + } ); + + const generation: { generatedCode: string; requirementsTxt: string; mcpConfig: string | null } = await ctx.runAction( api.codeGenerator.generateAgent, { + name: blueprint.name, + model: blueprint.model, + systemPrompt: blueprint.systemPrompt, + tools: blueprint.tools, + deploymentType: blueprint.deploymentType, + } ); + + const agentId: Id<"agents"> = await ctx.runMutation( api.agents.create, { + name: blueprint.name, + description: blueprint.description, + model: blueprint.model, + modelProvider: blueprint.modelProvider, + systemPrompt: blueprint.systemPrompt, + tools: blueprint.tools, + generatedCode: generation.generatedCode, + dockerConfig: "", + deploymentType: blueprint.deploymentType, + isPublic: false, + exposableAsMCPTool: false, + mcpToolName: "", + mcpInputSchema: undefined, + sourceWorkflowId: args.workflowId, + } as any ); + + await ctx.runMutation( internal.workflows.updateStatusInternal, { + workflowId: args.workflowId, + status: "published", + } ); + + return { + agentId, + workflowId: args.workflowId, + }; + }, +} ); + +function buildAgentBlueprint( params: { + workflow: any; + requestedName?: string | null; + requestedDescription?: string | null; +} ): AgentBlueprint { + const nodes = ( params.workflow.nodes || [] ) as WorkflowNode[]; + if ( !nodes.length ) { + throw new Error( "Workflow must include at least one node before publishing." ); + } + + const modelNode = nodes.find( ( node ) => node.data?.type === "Model" ); + if ( !modelNode ) { + throw new Error( "Add a Model node before generating an agent." ); + } + + const { model, modelProvider, deploymentType } = extractModelFromNode( modelNode ); + const systemPrompt = buildSystemPrompt( nodes ); + const toolSpecs = buildToolSpecs( nodes ); + + const name = ( params.requestedName?.trim() || params.workflow.name || "Visual Workflow Agent" ).slice( 0, 80 ); + const description = + params.requestedDescription?.trim() || + `Agent generated from visual workflow "${params.workflow.name}".`; + + return { + name, + description, + model, + modelProvider, + deploymentType, + systemPrompt, + tools: toolSpecs, + }; +} + +function extractModelFromNode( node: WorkflowNode ): { + model: string; + modelProvider: string; + deploymentType: "aws" | "ollama"; +} { + const config: any = node.data?.config || {}; + const model = config.modelId || config.model || "anthropic.claude-3-5-sonnet-20241022-v2:0"; + + let provider = ( config.provider || "" ).toLowerCase(); + if ( !provider ) { + // Bedrock IDs contain dots before the colon (e.g. "anthropic.claude-3-5-sonnet:0") + // Ollama IDs have no dots (e.g. "llama3:latest") + if ( model.includes( ":" ) ) { + const prefix = model.split( ":" )[0]; + provider = prefix.includes( "." ) ? "bedrock" : "ollama"; + } else { + provider = "bedrock"; + } + } + + const deploymentType: "aws" | "ollama" = provider === "ollama" ? "ollama" : "aws"; + const modelProvider = provider === "ollama" ? "ollama" : "bedrock"; + + return { model, modelProvider, deploymentType }; +} + +function buildSystemPrompt( nodes: WorkflowNode[] ): string { + const sections: string[] = []; + + const backgrounds = nodes + .filter( ( node ): node is WorkflowNode & { data: { type: "Background"; config: { text: string } } } => + node.data?.type === "Background" && "text" in ( node.data.config || {} ) ) + .map( ( node ) => node.data.config.text ); + if ( backgrounds.length ) { + sections.push( backgrounds.join( "\n\n" ) ); + } + + const systemSnippets = nodes + .filter( ( node ): node is WorkflowNode & { data: { type: "PromptText"; config: { role?: string; template: string } } } => + node.data?.type === "PromptText" ) + .filter( ( node ) => ( node.data.config.role || "system" ) === "system" ) + .map( ( node ) => node.data.config.template ) + .filter( Boolean ); + if ( systemSnippets.length ) { + sections.push( systemSnippets.join( "\n\n" ) ); + } + + // Prompt templates are the primary authored instructions in modern workflows. + const promptTemplates = nodes + .filter( ( node ) => node.data?.type === "Prompt" ) + .map( ( node ) => { + const config: any = node.data?.config || {}; + const role = typeof config.role === "string" ? config.role : "system"; + const template = typeof config.template === "string" ? config.template.trim() : ""; + return { role, template }; + } ) + .filter( ( item ) => item.template.length > 0 ); + + const systemPromptTemplates = promptTemplates + .filter( ( item ) => item.role === "system" ) + .map( ( item ) => item.template ); + if ( systemPromptTemplates.length ) { + sections.push( systemPromptTemplates.join( "\n\n" ) ); + } else if ( promptTemplates.length ) { + // Fallback to non-system templates if no explicit system prompt exists. + sections.push( promptTemplates.map( ( item ) => item.template ).join( "\n\n" ) ); + } + + const promptNode = nodes.find( ( node ) => node.data?.type === "Prompt" ); + const promptNotes = promptNode?.data?.notes || ""; + if ( promptNotes ) { + sections.push( promptNotes ); + } + + const combined = sections.join( "\n\n" ).trim(); + return combined || "You are a helpful assistant. Think step-by-step and call tools when they improve accuracy."; +} + +function buildToolSpecs( nodes: WorkflowNode[] ) { + const specs = new Map(); + + nodes + .filter( ( node ) => node.data?.type === "Tool" ) + .forEach( ( node ) => { + const config: any = node.data?.config || {}; + const normalizedName = normalizeToolName( config.name || node.data?.label || node.id ); + const metadata = findToolMetadata( normalizedName ); + const pipPackages = new Set(); + + if ( metadata?.basePip ) { + pipPackages.add( metadata.basePip ); + } + ( metadata?.additionalPipPackages || [] ).forEach( ( pkg ) => pipPackages.add( pkg ) ); + + const spec = { + name: metadata?.name || normalizedName || "custom_tool", + type: config.kind || "internal", + config: { + description: + metadata?.description || + node.data?.notes || + node.data?.label || + "Workflow tool generated from visual scripting.", + parameters: buildParameterMetadata( config.args ), + }, + requiresPip: pipPackages.size > 0 || Boolean( metadata?.extrasPip ), + pipPackages: pipPackages.size ? Array.from( pipPackages ) : undefined, + extrasPip: metadata?.extrasPip, + notSupportedOn: metadata?.notSupportedOn, + }; + + specs.set( spec.name, spec ); + } ); + + return Array.from( specs.values() ); +} + +function buildParameterMetadata( args: Record | undefined ) { + if ( !args || typeof args !== "object" ) { + return []; + } + + return Object.entries( args ).slice( 0, 10 ).map( ( [name, value] ) => ( { + name, + type: typeof value, + description: `Auto-generated parameter for ${name}`, + required: true, + } ) ); +} diff --git a/generateKeys.mjs b/generateKeys.mjs new file mode 100644 index 0000000..0e2e8b6 --- /dev/null +++ b/generateKeys.mjs @@ -0,0 +1,15 @@ +import { exportJWK, exportPKCS8, generateKeyPair } from "jose"; + +const keys = await generateKeyPair("RS256", { + extractable: true, +}); +const privateKey = await exportPKCS8(keys.privateKey); +const publicKey = await exportJWK(keys.publicKey); +const jwks = JSON.stringify({ keys: [{ use: "sig", ...publicKey }] }); + +process.stdout.write( + `JWT_PRIVATE_KEY="${privateKey.trimEnd().replace(/\n/g, " ")}"`, +); +process.stdout.write("\n"); +process.stdout.write(`JWKS=${jwks}`); +process.stdout.write("\n"); \ No newline at end of file diff --git a/index.html b/index.html index f587b5c..4955ed7 100644 --- a/index.html +++ b/index.html @@ -5,7 +5,7 @@ - Chef + AI Forge diff --git a/mydiagram.py b/mydiagram.py index 3e5a46c..e69de29 100644 --- a/mydiagram.py +++ b/mydiagram.py @@ -1 +0,0 @@ -I asked you about the mcp implementation, please let the fucking other bot handle that fucked file. \ No newline at end of file diff --git a/package.json b/package.json index 1eb40a4..a1c8f2f 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "type": "module", "scripts": { "dev": "npm-run-all --parallel dev:frontend dev:backend", - "dev:frontend": "vite --open", + "dev:frontend": "vite --port=4000 --open", "dev:backend": "convex dev", "build": "vite build", "lint": "tsc -p convex -noEmit --pretty false && tsc -p . -noEmit --pretty false && convex dev --once && vite build", @@ -27,9 +27,10 @@ "@aws-sdk/client-ecr": "^3.913.0", "@aws-sdk/client-ecs": "^3.701.0", "@aws-sdk/client-lambda": "^3.913.0", - "@aws-sdk/client-s3": "^3.701.0", + "@aws-sdk/client-polly": "^3.917.0", + "@aws-sdk/client-s3": "^3.917.0", "@aws-sdk/client-sts": "^3.911.0", - "@aws-sdk/s3-request-presigner": "^3.910.0", + "@aws-sdk/s3-request-presigner": "^3.917.0", "@convex-dev/auth": "^0.0.80", "@modelcontextprotocol/sdk": "^1.20.1", "@types/react-syntax-highlighter": "^15.5.13", @@ -41,7 +42,9 @@ "react": "^19.0.0", "react-dom": "^19.0.0", "react-syntax-highlighter": "^15.6.6", + "reactflow": "^11.11.4", "sonner": "^2.0.3", + "stripe": "^20.3.1", "tailwind-merge": "^3.1.0" }, "devDependencies": { diff --git a/public/agentforge_logo400.png b/public/agentforge_logo400.png new file mode 100644 index 0000000..8dcdcc6 Binary files /dev/null and b/public/agentforge_logo400.png differ diff --git a/requirements.txt b/requirements.txt index ec71198..c7900f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,14 @@ # ============================================================================ # CORE AGENT RUNTIME (AWS Bedrock AgentCore) # ============================================================================ -# NOTE: agent-core-code-interpreter extra removed - it pins bedrock-agentcore==0.1.0 -# The code_interpreter tool is available in base strands-agents-tools (no extra needed) -strands-agents-tools[a2a-client,local-chromium-browser,mem0-memory,diagram,rss,use-computer]>=0.2.12 -strands-agents>=1.0.0 +# Strands Agents - Core framework +# NOTE: These packages are private and break public builds. +# Install them manually from the internal registry when available. +# strandsagents>=1.0.0 +# strands-agents>=1.0.0 + +# Strands Tools - All extras for comprehensive tool support +strands-agents-tools[a2a-client,local-chromium-browser,mem0-memory,diagram,rss,use-computer,agent-core-memory,agent-core-browser]>=0.2.12 # Bedrock AgentCore stack - latest versions bedrock-agentcore>=0.1.6 @@ -63,13 +67,37 @@ uvicorn[standard]>=0.27.0 # Pydantic for data validation pydantic>=2.5.0 -# OpenTelemetry for X-Ray tracing (on-demand logging) +# ============================================================================ +# SECURITY +# ============================================================================ +# Cryptography for secure operations +cryptography>=44.0.1 + +# Input validation and sanitization +bleach>=6.1.0 + +# Rate limiting +slowapi>=0.1.9 + +# Security headers +secure>=0.3.0 + +# ============================================================================ +# OBSERVABILITY (OpenTelemetry + AWS X-Ray) +# ============================================================================ +# OpenTelemetry for distributed tracing (vendor-neutral) opentelemetry-api>=1.22.0 opentelemetry-sdk>=1.22.0 opentelemetry-instrumentation-fastapi>=0.43b0 + +# AWS X-Ray SDK and OTEL exporter aws-xray-sdk>=2.12.0 +opentelemetry-sdk-extension-aws>=2.0.1 -# Logging +# OTEL exports to: AWS X-Ray (via opentelemetry-sdk-extension-aws) +# View traces in: AWS Console → X-Ray → Service Map + +# Logging (with security audit support) python-json-logger>=2.0.7 # Runtime helpers pulled in transitively; keep pinned explicitly @@ -83,3 +111,9 @@ python-dateutil>=2.8.2 # Browser automation runtime (required by AgentCore Browser quickstart) playwright>=1.47 nest-asyncio>=1.6 + +# Ollama for local model support (Tier 2 deployments) +ollama>=0.6.0 + +# UTCP for agent communication +strands-utcp>=0.1.0 diff --git a/src/App.tsx b/src/App.tsx index 2309a4c..66a4eba 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -10,12 +10,14 @@ import { ErrorLogsPanel } from "./components/ErrorLogsPanel"; import { AuditLogsPanel } from "./components/AuditLogsPanel"; import { AIAgentBuilder } from "./components/AIAgentBuilder"; import { InterleavedChat } from "./components/InterleavedChat"; +import { VisualScriptingBuilder } from "./components/VisualScriptingBuilder"; +import { PricingPanel } from "./components/PricingPanel"; import { useState } from "react"; -import { Bot, Home, Server, AlertCircle, FileText, Sparkles, MessageSquare } from "lucide-react"; +import { Bot, Home, Server, AlertCircle, FileText, Sparkles, MessageSquare, GitBranch, CreditCard } from "lucide-react"; import { BuilderAutomationProvider } from "./context/BuilderAutomationContext"; export default function App() { - const [currentView, setCurrentView] = useState<"dashboard" | "builder" | "aiBuilder" | "chat" | "mcp" | "errors" | "audit" | "settings">("dashboard"); + const [currentView, setCurrentView] = useState<"dashboard" | "builder" | "aiBuilder" | "chat" | "mcp" | "errors" | "audit" | "settings" | "visualScripting">("dashboard"); return ( @@ -28,14 +30,14 @@ export default function App() { Agent Builder - + @@ -132,14 +156,14 @@ export default function App() { - + ); } -function Content({ currentView, onNavigate }: { currentView: string; onNavigate: (view: "dashboard" | "builder" | "aiBuilder" | "chat" | "mcp" | "errors" | "audit" | "settings") => void }) { +function Content({ currentView, onNavigate }: { currentView: string; onNavigate: (view: "dashboard" | "builder" | "aiBuilder" | "chat" | "mcp" | "errors" | "audit" | "settings" | "visualScripting") => void }) { const loggedInUser = useQuery(api.auth.loggedInUser); if (loggedInUser === undefined) { @@ -166,6 +190,10 @@ function Content({ currentView, onNavigate }: { currentView: string; onNavigate: return ; case "audit": return ; + case "visualScripting": + return ; + case "settings": + return ; case "dashboard": default: return ; diff --git a/src/SignInForm.tsx b/src/SignInForm.tsx index cdc2d92..178e763 100644 --- a/src/SignInForm.tsx +++ b/src/SignInForm.tsx @@ -42,6 +42,7 @@ export function SignInForm() { name="email" placeholder="Email" required + autoComplete="email" /> Sign in with GitHub -