diff --git a/output.txt b/output.txt new file mode 100644 index 000000000..2e035484c --- /dev/null +++ b/output.txt @@ -0,0 +1,118 @@ +# Context Crunching in Agent Farm + +## Overview + +Context crunching is a mechanism used in the agent_farm project to manage the context window size when the LLM is processing a large amount of information. It helps prevent the context window from becoming too large, which could lead to performance issues or token limit errors. + +## Toggle Fort Reasoning + +The "toggle fort reasoning" is a feature that can be enabled or disabled to control whether the agent uses a multi-step reasoning process. When enabled: + +1. The agent breaks down complex tasks into smaller, more manageable steps +2. It maintains a plan and gives out tasks to a simulated "junior engineer" +3. It uses O3 (a specific LLM model) for orchestration + +This feature is controlled by environment variables and user settings: +```rust +let reasoning = if whoami::username() == "skcd".to_owned() + || whoami::username() == "root".to_owned() + || std::env::var("SIDECAR_ENABLE_REASONING").map_or(false, |v| !v.is_empty()) +{ + reasoning +} else { + // gate hard for now before we push a new version of the editor + false +} +``` + +## Action Node Compression + +Action nodes represent steps taken by the agent during its execution. As these accumulate, they can consume a significant portion of the context window. The context crunching process compresses these action nodes by: + +1. Identifying when the input tokens exceed a threshold (60,000 tokens for standard LLMs, 120,000 for custom LLMs) +2. Finding the last reasoning node as a starting point +3. Summarizing the work done so far +4. Creating a new starting point with the compressed context + +## O3 Usage + +The system uses a specific model called "O3MiniHigh" for context crunching and reasoning: + +```rust +let llm_properties = match &self.context_crunching_llm { + Some(props) => props.clone(), + None => LLMProperties::new( + LLMType::O3MiniHigh, + llm_client::provider::LLMProvider::OpenAI, + llm_client::provider::LLMProviderAPIKeys::OpenAI(OpenAIProvider::new( + std::env::var("OPENAI_API_KEY").expect("env var to be present"), + )), + ), +}; +``` + +This model is specifically chosen for its ability to efficiently summarize and reason about the agent's progress. + +## Context Crunching Process + +The context crunching process follows these steps: + +1. **Trigger Detection**: In `agent_loop()` within `session/service.rs`, the system checks if the input tokens exceed the threshold +2. **Context Collection**: It gathers: + - The original user instruction + - All action nodes since the last reasoning node + - Previous reasoning nodes for continuity +3. **Summarization**: The `context_crunching()` method in `ToolUseAgent` is called with this context +4. **LLM Processing**: The O3MiniHigh model processes this information with a specialized prompt +5. **Output Generation**: The LLM generates: + - A summary of work done so far + - A revised instruction that maintains the original intent +6. **Context Reset**: The session is reset with this new compressed context, and execution continues + +## When Context Crunching is Triggered + +Context crunching is triggered when: + +1. The context_crunching flag is enabled (controlled by username or environment variable) +2. The input tokens exceed the threshold (60k for standard LLMs, 120k for custom LLMs) +3. There are action nodes that can be compressed + +The relevant code in `session/service.rs`: + +```rust +if context_crunching { + if let Some(input_tokens) = session + .action_nodes() + .last() + .map(|action_node| action_node.get_llm_usage_statistics()) + .flatten() + .map(|llm_stats| { + llm_stats.input_tokens().unwrap_or_default() + + llm_stats.cached_input_tokens().unwrap_or_default() + }) + { + // For custom LLMs, we use a higher token threshold + let token_threshold = if message_properties.llm_properties().llm().is_custom() { + 120_000 + } else { + 60_000 + }; + if input_tokens >= token_threshold { + println!("context_crunching"); + // ... context crunching logic ... + } + } +} +``` + +## The Flow from tool_use Endpoint to Context Crunching + +1. **Entry Point**: The process starts at the `agent_tool_use` endpoint in `agentic.rs` +2. **Session Service**: The request is passed to `tool_use_agentic()` in `session/service.rs` +3. **Agent Loop**: Within the agent loop, the system checks if context crunching is needed +4. **Context Crunching**: If triggered, `context_crunching()` is called on the `ToolUseAgent` +5. **Processing**: The O3MiniHigh model processes the context and generates a summary +6. **Session Reset**: The session is reset with the new compressed context +7. **Continuation**: The agent continues execution with the compressed context + +This entire process ensures that the agent can handle long-running tasks without exceeding token limits, while maintaining the coherence and intent of the original instruction. \ No newline at end of file diff --git a/sidecar/src/webserver/agentic.rs b/sidecar/src/webserver/agentic.rs index ca8a47e40..4c45a31c1 100644 --- a/sidecar/src/webserver/agentic.rs +++ b/sidecar/src/webserver/agentic.rs @@ -39,6 +39,7 @@ use crate::chunking::text_document::Range; use crate::repo::types::RepoRef; use crate::webserver::plan::{ check_plan_storage_path, check_scratch_pad_path, plan_storage_directory, +use crate::webserver::message_moderation; }; use crate::{application::application::Application, user_context::types::UserContext}; @@ -1175,6 +1176,26 @@ pub async fn agent_session_chat( "webserver::agent_session::chat::session_id({})", &session_id ); + + // Check if the message contains inappropriate content + let original_query = query.clone(); + let query = if message_moderation::contains_inappropriate_content(&query) { + println!("Filtering inappropriate content in message"); + // If the message should be blocked entirely, replace it with a warning + if message_moderation::should_block_message(&query) { + "Your message was blocked due to inappropriate content. Please maintain professional communication.".to_string() + } else { + // Otherwise, filter out the inappropriate words + message_moderation::filter_inappropriate_content(&query) + } + } else { + query + }; + + // Log if the message was modified + if original_query != query { + println!("Message was modified due to inappropriate content"); + } let cancellation_token = tokio_util::sync::CancellationToken::new(); let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let message_properties = SymbolEventMessageProperties::new( @@ -1709,6 +1730,25 @@ pub async fn agent_tool_use( || whoami::username() == "root".to_owned() || std::env::var("SIDECAR_ENABLE_REASONING").map_or(false, |v| !v.is_empty()) { + // Check if the message contains inappropriate content + let original_query = query.clone(); + let query = if message_moderation::contains_inappropriate_content(&query) { + println!("Filtering inappropriate content in message"); + // If the message should be blocked entirely, replace it with a warning + if message_moderation::should_block_message(&query) { + "Your message was blocked due to inappropriate content. Please maintain professional communication.".to_string() + } else { + // Otherwise, filter out the inappropriate words + message_moderation::filter_inappropriate_content(&query) + } + } else { + query + }; + + // Log if the message was modified + if original_query != query { + println!("Message was modified due to inappropriate content"); + } reasoning } else { // gate hard for now before we push a new verwsion of the editor @@ -2170,4 +2210,4 @@ pub async fn agent_session_plan( let stream = init_stream.chain(answer_stream).chain(done_stream); Ok(Sse::new(Box::pin(stream))) -} +} \ No newline at end of file diff --git a/sidecar/src/webserver/message_moderation.rs b/sidecar/src/webserver/message_moderation.rs new file mode 100644 index 000000000..cecff75c4 --- /dev/null +++ b/sidecar/src/webserver/message_moderation.rs @@ -0,0 +1,126 @@ +//! Contains functionality for moderating chat messages to filter out inappropriate content + +use std::collections::HashSet; +use once_cell::sync::Lazy; + +/// A static set of words that are considered inappropriate and should be filtered +static INAPPROPRIATE_WORDS: Lazy> = Lazy::new(|| { + let mut set = HashSet::new(); + // Common profanity and offensive terms + set.insert("fuck"); + set.insert("shit"); + set.insert("ass"); + set.insert("bitch"); + set.insert("damn"); + set.insert("cunt"); + set.insert("dick"); + set.insert("cock"); + set.insert("pussy"); + // Add more words as needed + set +}); + +/// Checks if a message contains inappropriate content +/// +/// # Arguments +/// +/// * `message` - The message to check +/// +/// # Returns +/// +/// `true` if the message contains inappropriate content, `false` otherwise +pub fn contains_inappropriate_content(message: &str) -> bool { + let lowercase_message = message.to_lowercase(); + + // Check if any inappropriate word is in the message + INAPPROPRIATE_WORDS.iter().any(|&word| lowercase_message.contains(word)) +} + +/// Filters inappropriate content from a message +/// +/// # Arguments +/// +/// * `message` - The message to filter +/// +/// # Returns +/// +/// A filtered version of the message with inappropriate content replaced with asterisks +pub fn filter_inappropriate_content(message: &str) -> String { + let mut filtered_message = message.to_string(); + + for &word in INAPPROPRIATE_WORDS.iter() { + let replacement = "*".repeat(word.len()); + + // Case-insensitive replacement + let lowercase_message = filtered_message.to_lowercase(); + let mut start_idx = 0; + + while let Some(pos) = lowercase_message[start_idx..].find(word) { + let actual_pos = start_idx + pos; + filtered_message.replace_range(actual_pos..actual_pos + word.len(), &replacement); + start_idx = actual_pos + replacement.len(); + + // Break if we've reached the end of the string + if start_idx >= lowercase_message.len() { + break; + } + } + } + + filtered_message +} + +/// Determines if a message should be blocked entirely due to inappropriate content +/// +/// # Arguments +/// +/// * `message` - The message to check +/// +/// # Returns +/// +/// `true` if the message should be blocked, `false` otherwise +pub fn should_block_message(message: &str) -> bool { + // Block messages that are primarily inappropriate content + // This is a simple implementation that blocks messages where more than 30% of the words are inappropriate + + let words: Vec<&str> = message.split_whitespace().collect(); + if words.is_empty() { + return false; + } + + let inappropriate_word_count = words.iter() + .filter(|&&word| INAPPROPRIATE_WORDS.contains(&word.to_lowercase())) + .count(); + + // Block if more than 30% of words are inappropriate + (inappropriate_word_count as f32 / words.len() as f32) > 0.3 +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_contains_inappropriate_content() { + assert!(contains_inappropriate_content("This is a fuck test")); + assert!(contains_inappropriate_content("This is a FUCK test")); + assert!(contains_inappropriate_content("This is a Fuck test")); + assert!(!contains_inappropriate_content("This is a clean test")); + } + + #[test] + fn test_filter_inappropriate_content() { + assert_eq!(filter_inappropriate_content("This is a fuck test"), "This is a **** test"); + assert_eq!(filter_inappropriate_content("This is a FUCK test"), "This is a **** test"); + assert_eq!(filter_inappropriate_content("This is a Fuck test"), "This is a **** test"); + assert_eq!(filter_inappropriate_content("This is a clean test"), "This is a clean test"); + } + + #[test] + fn test_should_block_message() { + assert!(should_block_message("fuck you")); + assert!(should_block_message("fuck shit damn")); + assert!(!should_block_message("This is a test with one inappropriate word: fuck")); + assert!(!should_block_message("This is a clean test")); + } +} \ No newline at end of file diff --git a/sidecar/src/webserver/mod.rs b/sidecar/src/webserver/mod.rs index f77f79f1c..97a5222cf 100644 --- a/sidecar/src/webserver/mod.rs +++ b/sidecar/src/webserver/mod.rs @@ -8,7 +8,8 @@ pub mod health; pub mod in_line_agent; pub mod in_line_agent_stream; pub mod inline_completion; +pub mod message_moderation; pub mod model_selection; pub(crate) mod plan; pub mod tree_sitter; -pub mod types; +pub mod types; \ No newline at end of file