diff --git a/libs/openant-core/context/application_context.py b/libs/openant-core/context/application_context.py index 11940db1..279ffb96 100644 --- a/libs/openant-core/context/application_context.py +++ b/libs/openant-core/context/application_context.py @@ -145,6 +145,26 @@ def get_type_info(self) -> dict: """Get detailed information about this application type.""" return APPLICATION_TYPE_INFO.get(self.application_type, {}) + def suppress_local_only(self) -> bool: + """Whether to tell the analyzer to flag only REMOTE-attacker vulnerabilities. + + The "local users have access, only flag remote" framing is correct for a + CLI/library whose inputs are all operator-controlled. But a data-processing + library (parser, deserializer, codec) takes UNTRUSTED INPUT DATA — and that + data crossing into the code IS the attack surface, even with no network + listener. ``requires_remote_trigger`` alone (False for every library) would + suppress exactly those bugs. Gate on the already-captured ``trust_boundaries``: + if any input source is ``untrusted``, do NOT suppress, regardless of type. + """ + if self.requires_remote_trigger: + return False + # Case-insensitive: trust_boundaries values are LLM-generated and may + # deviate from the schema's lowercase 'untrusted' (e.g. 'Untrusted'). + return not any( + str(level).lower() == "untrusted" + for level in (self.trust_boundaries or {}).values() + ) + # Files to check for manual override (in order of priority) MANUAL_OVERRIDE_FILES = [ @@ -460,7 +480,7 @@ def _build_type_descriptions() -> str: **Guidelines:** - `application_type`: MUST be one of: web_app, cli_tool, library, agent_framework, unsupported -- `requires_remote_trigger`: Set to `false` for cli_tool, library, agent_framework. Set to `true` for web_app. +- `requires_remote_trigger`: Set to `true` for web_app, AND for any cli_tool/library/agent_framework that PROCESSES UNTRUSTED INPUT DATA (a parser, deserializer, codec, file/format reader, or anything where `trust_boundaries` marks an input source `untrusted` — the untrusted data crossing into the code is the attack surface even with no network listener). Set to `false` only when every input source is operator-controlled/trusted. - `confidence`: 0.0-1.0 based on how much information was available. - Be specific in `not_a_vulnerability` - these will directly prevent false positives. """ @@ -616,7 +636,7 @@ def format_context_for_prompt(context: ApplicationContext) -> str: lines.append(f"- {item}") lines.append("") - if not context.requires_remote_trigger: + if context.suppress_local_only(): lines.append("**IMPORTANT:** This is a CLI tool/library. Users running this code have local access.") lines.append("Only flag vulnerabilities that could be exploited by a REMOTE attacker, not by local users.") lines.append("") diff --git a/libs/openant-core/prompts/verification_prompts.py b/libs/openant-core/prompts/verification_prompts.py index a0b10978..fde990f3 100644 --- a/libs/openant-core/prompts/verification_prompts.py +++ b/libs/openant-core/prompts/verification_prompts.py @@ -27,7 +27,7 @@ def get_verification_system_prompt(app_context: "ApplicationContext" = None) -> """ base_prompt = VERIFICATION_SYSTEM_PROMPT - if app_context and not app_context.requires_remote_trigger: + if app_context and app_context.suppress_local_only(): base_prompt += """ IMPORTANT: This is a CLI tool or library. The user running this code has local filesystem access. @@ -66,7 +66,7 @@ def format_app_context_for_verification(app_context: "ApplicationContext") -> st lines.append(f"- {item}") lines.append("") - if not app_context.requires_remote_trigger: + if app_context.suppress_local_only(): lines.append("**CRITICAL:** This is a CLI tool/library. Users have local filesystem access.") lines.append("A vulnerability requires a REMOTE attacker to exploit it.") lines.append("If the 'attack' requires running CLI commands locally, it's NOT a vulnerability.") @@ -125,7 +125,7 @@ def get_verification_prompt( ```""" # Adjust attacker description based on app context - if app_context and not app_context.requires_remote_trigger: + if app_context and app_context.suppress_local_only(): attacker_description = """You are an attacker on the internet. You have a browser and nothing else. No server access, no admin credentials, no ability to modify files on the server, and NO ABILITY TO RUN CLI COMMANDS. diff --git a/libs/openant-core/prompts/vulnerability_analysis.py b/libs/openant-core/prompts/vulnerability_analysis.py index 3279c631..04a2862c 100644 --- a/libs/openant-core/prompts/vulnerability_analysis.py +++ b/libs/openant-core/prompts/vulnerability_analysis.py @@ -58,7 +58,7 @@ def format_app_context_for_prompt(app_context: "ApplicationContext") -> str: lines.append(f"- {item}") lines.append("") - if not app_context.requires_remote_trigger: + if app_context.suppress_local_only(): lines.append("**IMPORTANT:** This is a CLI tool/library. Users running this code have local access.") lines.append("Only flag vulnerabilities that could be exploited by a REMOTE attacker, not by local users.") lines.append("") @@ -141,7 +141,7 @@ def get_analysis_prompt( >>> END OF TARGET FUNCTION <<<""" # Build the appropriate questions based on whether we have app context - if app_context and not app_context.requires_remote_trigger: + if app_context and app_context.suppress_local_only(): input_question = """2. **Where does input come from and who controls it?** - Remote attackers (HTTP requests, external APIs)? → potential concern - Local users running the CLI/library? → NOT an attack vector (they have local access) @@ -213,7 +213,7 @@ def get_system_prompt(app_context: "ApplicationContext" = None) -> str: """ base_prompt = STAGE1_SYSTEM_PROMPT - if app_context and not app_context.requires_remote_trigger: + if app_context and app_context.suppress_local_only(): base_prompt += """ IMPORTANT: This is a CLI tool or library. The user running this code has local filesystem access. diff --git a/libs/openant-core/tests/test_threat_model_untrusted_input_gate.py b/libs/openant-core/tests/test_threat_model_untrusted_input_gate.py new file mode 100644 index 00000000..8d55def4 --- /dev/null +++ b/libs/openant-core/tests/test_threat_model_untrusted_input_gate.py @@ -0,0 +1,83 @@ +"""Fix A — threat-model gate: don't suppress untrusted-input bugs for data libraries. + +`format_app_context_for_prompt` (Stage 1) and `format_app_context_for_verification` +(Stage 2) historically emitted a "this is a CLI tool/library, only flag REMOTE +attackers, not local users" suppression block whenever `requires_remote_trigger` +was False — which the generator sets for EVERY library. That discards exactly the +bug class of a parser/deserializer/codec, whose untrusted INPUT DATA is the attack +surface even with no network listener (the tree-sitter case). + +Fix: gate the suppression on `ApplicationContext.suppress_local_only()`, which keeps +the clause only when NO trust boundary is `untrusted`. A library that ingests +untrusted data (`source_code_being_parsed: untrusted`) is therefore analysed, while +a genuine no-attack-surface library (all-trusted) is unchanged — no new field, reuses +the already-captured `trust_boundaries`. +""" +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) # libs/openant-core + +from context.application_context import ApplicationContext # noqa: E402 +from prompts.vulnerability_analysis import format_app_context_for_prompt # noqa: E402 +from prompts.verification_prompts import format_app_context_for_verification # noqa: E402 + +# Sentinels that appear ONLY inside the suppression block of each formatter. +_STAGE1_SUPPRESSION = "have local access" # vulnerability_analysis.py +_STAGE2_SUPPRESSION = "local filesystem access" # verification_prompts.py + + +def _lib(trust_boundaries, *, remote=False): + return ApplicationContext( + application_type="library", + purpose="x", + trust_boundaries=trust_boundaries, + requires_remote_trigger=remote, + ) + + +# --- the method itself -------------------------------------------------------- +def test_suppress_when_all_trusted(): + assert _lib({"cli_args": "trusted", "config": "trusted"}).suppress_local_only() is True + + +def test_do_not_suppress_when_any_untrusted(): + assert _lib({"source_code_being_parsed": "untrusted", "config": "trusted"}).suppress_local_only() is False + + +def test_untrusted_match_is_case_insensitive(): + # trust_boundaries values are LLM-generated; tolerate case deviation. + assert _lib({"input": "Untrusted"}).suppress_local_only() is False + assert _lib({"input": "UNTRUSTED"}).suppress_local_only() is False + + +def test_do_not_suppress_when_remote(): + # web_app-style: remote trigger always means no local-only suppression. + assert _lib({"cli_args": "trusted"}, remote=True).suppress_local_only() is False + + +def test_empty_boundaries_still_suppress(): + # No declared boundaries + local-only library -> keep the conservative suppression. + assert _lib({}).suppress_local_only() is True + + +# --- Stage 1 formatter -------------------------------------------------------- +def test_stage1_all_trusted_keeps_suppression(): + out = format_app_context_for_prompt(_lib({"cli_args": "trusted"})) + assert _STAGE1_SUPPRESSION in out + + +def test_stage1_untrusted_input_drops_suppression(): + out = format_app_context_for_prompt(_lib({"source_code_being_parsed": "untrusted"})) + assert _STAGE1_SUPPRESSION not in out + + +# --- Stage 2 formatter -------------------------------------------------------- +def test_stage2_all_trusted_keeps_suppression(): + out = format_app_context_for_verification(_lib({"cli_args": "trusted"})) + assert _STAGE2_SUPPRESSION in out + + +def test_stage2_untrusted_input_drops_suppression(): + out = format_app_context_for_verification(_lib({"source_code_being_parsed": "untrusted"})) + assert _STAGE2_SUPPRESSION not in out