knostic · gadievron · Jun 12, 2026
@@ -145,6 +145,26 @@ def get_type_info(self) -> dict:
         """Get detailed information about this application type."""
         return APPLICATION_TYPE_INFO.get(self.application_type, {})
 
+    def suppress_local_only(self) -> bool:
+        """Whether to tell the analyzer to flag only REMOTE-attacker vulnerabilities.
+
+        The "local users have access, only flag remote" framing is correct for a
+        CLI/library whose inputs are all operator-controlled. But a data-processing
+        library (parser, deserializer, codec) takes UNTRUSTED INPUT DATA — and that
+        data crossing into the code IS the attack surface, even with no network
+        listener. ``requires_remote_trigger`` alone (False for every library) would
+        suppress exactly those bugs. Gate on the already-captured ``trust_boundaries``:
+        if any input source is ``untrusted``, do NOT suppress, regardless of type.
+        """
+        if self.requires_remote_trigger:
+            return False
+        # Case-insensitive: trust_boundaries values are LLM-generated and may
+        # deviate from the schema's lowercase 'untrusted' (e.g. 'Untrusted').
+        return not any(
+            str(level).lower() == "untrusted"
+            for level in (self.trust_boundaries or {}).values()
+        )
+
 
 # Files to check for manual override (in order of priority)
 MANUAL_OVERRIDE_FILES = [
@@ -460,7 +480,7 @@ def _build_type_descriptions() -> str:
 
 **Guidelines:**
 - `application_type`: MUST be one of: web_app, cli_tool, library, agent_framework, unsupported
-- `requires_remote_trigger`: Set to `false` for cli_tool, library, agent_framework. Set to `true` for web_app.
+- `requires_remote_trigger`: Set to `true` for web_app, AND for any cli_tool/library/agent_framework that PROCESSES UNTRUSTED INPUT DATA (a parser, deserializer, codec, file/format reader, or anything where `trust_boundaries` marks an input source `untrusted` — the untrusted data crossing into the code is the attack surface even with no network listener). Set to `false` only when every input source is operator-controlled/trusted.
 - `confidence`: 0.0-1.0 based on how much information was available.
 - Be specific in `not_a_vulnerability` - these will directly prevent false positives.
 """
@@ -616,7 +636,7 @@ def format_context_for_prompt(context: ApplicationContext) -> str:
             lines.append(f"- {item}")
         lines.append("")
 
-    if not context.requires_remote_trigger:
+    if context.suppress_local_only():
         lines.append("**IMPORTANT:** This is a CLI tool/library. Users running this code have local access.")
         lines.append("Only flag vulnerabilities that could be exploited by a REMOTE attacker, not by local users.")
         lines.append("")

@@ -27,7 +27,7 @@ def get_verification_system_prompt(app_context: "ApplicationContext" = None) ->
     """
     base_prompt = VERIFICATION_SYSTEM_PROMPT
 
-    if app_context and not app_context.requires_remote_trigger:
+    if app_context and app_context.suppress_local_only():
         base_prompt += """
 
 IMPORTANT: This is a CLI tool or library. The user running this code has local filesystem access.
@@ -66,7 +66,7 @@ def format_app_context_for_verification(app_context: "ApplicationContext") -> st
             lines.append(f"- {item}")
         lines.append("")
 
-    if not app_context.requires_remote_trigger:
+    if app_context.suppress_local_only():
         lines.append("**CRITICAL:** This is a CLI tool/library. Users have local filesystem access.")
         lines.append("A vulnerability requires a REMOTE attacker to exploit it.")
         lines.append("If the 'attack' requires running CLI commands locally, it's NOT a vulnerability.")
@@ -125,7 +125,7 @@ def get_verification_prompt(
 ```"""
 
     # Adjust attacker description based on app context
-    if app_context and not app_context.requires_remote_trigger:
+    if app_context and app_context.suppress_local_only():
         attacker_description = """You are an attacker on the internet. You have a browser and nothing else.
 No server access, no admin credentials, no ability to modify files on the server, and NO ABILITY TO RUN CLI COMMANDS.
 

@@ -58,7 +58,7 @@ def format_app_context_for_prompt(app_context: "ApplicationContext") -> str:
             lines.append(f"- {item}")
         lines.append("")
 
-    if not app_context.requires_remote_trigger:
+    if app_context.suppress_local_only():
         lines.append("**IMPORTANT:** This is a CLI tool/library. Users running this code have local access.")
         lines.append("Only flag vulnerabilities that could be exploited by a REMOTE attacker, not by local users.")
         lines.append("")
@@ -141,7 +141,7 @@ def get_analysis_prompt(
 >>> END OF TARGET FUNCTION <<<"""
 
     # Build the appropriate questions based on whether we have app context
-    if app_context and not app_context.requires_remote_trigger:
+    if app_context and app_context.suppress_local_only():
         input_question = """2. **Where does input come from and who controls it?**
    - Remote attackers (HTTP requests, external APIs)? → potential concern
    - Local users running the CLI/library? → NOT an attack vector (they have local access)
@@ -213,7 +213,7 @@ def get_system_prompt(app_context: "ApplicationContext" = None) -> str:
     """
     base_prompt = STAGE1_SYSTEM_PROMPT
 
-    if app_context and not app_context.requires_remote_trigger:
+    if app_context and app_context.suppress_local_only():
         base_prompt += """
 
 IMPORTANT: This is a CLI tool or library. The user running this code has local filesystem access.

@@ -0,0 +1,83 @@
+"""Fix A — threat-model gate: don't suppress untrusted-input bugs for data libraries.
+
+`format_app_context_for_prompt` (Stage 1) and `format_app_context_for_verification`
+(Stage 2) historically emitted a "this is a CLI tool/library, only flag REMOTE
+attackers, not local users" suppression block whenever `requires_remote_trigger`
+was False — which the generator sets for EVERY library. That discards exactly the
+bug class of a parser/deserializer/codec, whose untrusted INPUT DATA is the attack
+surface even with no network listener (the tree-sitter case).
+
+Fix: gate the suppression on `ApplicationContext.suppress_local_only()`, which keeps
+the clause only when NO trust boundary is `untrusted`. A library that ingests
+untrusted data (`source_code_being_parsed: untrusted`) is therefore analysed, while
+a genuine no-attack-surface library (all-trusted) is unchanged — no new field, reuses
+the already-captured `trust_boundaries`.
+"""
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))  # libs/openant-core
+
+from context.application_context import ApplicationContext  # noqa: E402
+from prompts.vulnerability_analysis import format_app_context_for_prompt  # noqa: E402
+from prompts.verification_prompts import format_app_context_for_verification  # noqa: E402
+
+# Sentinels that appear ONLY inside the suppression block of each formatter.
+_STAGE1_SUPPRESSION = "have local access"        # vulnerability_analysis.py
+_STAGE2_SUPPRESSION = "local filesystem access"  # verification_prompts.py
+
+
+def _lib(trust_boundaries, *, remote=False):
+    return ApplicationContext(
+        application_type="library",
+        purpose="x",
+        trust_boundaries=trust_boundaries,
+        requires_remote_trigger=remote,
+    )
+
+
+# --- the method itself --------------------------------------------------------
+def test_suppress_when_all_trusted():
+    assert _lib({"cli_args": "trusted", "config": "trusted"}).suppress_local_only() is True
+
+
+def test_do_not_suppress_when_any_untrusted():
+    assert _lib({"source_code_being_parsed": "untrusted", "config": "trusted"}).suppress_local_only() is False
+
+
+def test_untrusted_match_is_case_insensitive():
+    # trust_boundaries values are LLM-generated; tolerate case deviation.
+    assert _lib({"input": "Untrusted"}).suppress_local_only() is False
+    assert _lib({"input": "UNTRUSTED"}).suppress_local_only() is False
+
+
+def test_do_not_suppress_when_remote():
+    # web_app-style: remote trigger always means no local-only suppression.
+    assert _lib({"cli_args": "trusted"}, remote=True).suppress_local_only() is False
+
+
+def test_empty_boundaries_still_suppress():
+    # No declared boundaries + local-only library -> keep the conservative suppression.
+    assert _lib({}).suppress_local_only() is True
+
+
+# --- Stage 1 formatter --------------------------------------------------------
+def test_stage1_all_trusted_keeps_suppression():
+    out = format_app_context_for_prompt(_lib({"cli_args": "trusted"}))
+    assert _STAGE1_SUPPRESSION in out
+
+
+def test_stage1_untrusted_input_drops_suppression():
+    out = format_app_context_for_prompt(_lib({"source_code_being_parsed": "untrusted"}))
+    assert _STAGE1_SUPPRESSION not in out
+
+
+# --- Stage 2 formatter --------------------------------------------------------
+def test_stage2_all_trusted_keeps_suppression():
+    out = format_app_context_for_verification(_lib({"cli_args": "trusted"}))
+    assert _STAGE2_SUPPRESSION in out
+
+
+def test_stage2_untrusted_input_drops_suppression():
+    out = format_app_context_for_verification(_lib({"source_code_being_parsed": "untrusted"}))
+    assert _STAGE2_SUPPRESSION not in out