Added support for twelvelabs pegasus

Sameerlite · Sameerlite · commit 247160277e11 · 2025-11-27T15:46:04.000+05:30
diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1222,6 +1222,9 @@ def add_known_models():
 from .llms.bedrock.chat.invoke_transformations.amazon_titan_transformation import (
     AmazonTitanConfig,
 )
+from .llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation import (
+    AmazonTwelveLabsPegasusConfig,
+)
 from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
     AmazonInvokeConfig,
 )
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -851,6 +851,7 @@
     "nova",
     "deepseek_r1",
     "qwen3",
+    "twelvelabs",
 ]
 
 BEDROCK_EMBEDDING_PROVIDERS_LITERAL = Literal[
diff --git a/litellm/llms/bedrock/chat/invoke_transformations/amazon_twelvelabs_pegasus_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/amazon_twelvelabs_pegasus_transformation.py
@@ -0,0 +1,133 @@
+"""
+Transforms OpenAI-style requests into TwelveLabs Pegasus 1.2 requests for Bedrock.
+
+Reference:
+https://docs.twelvelabs.io/docs/models/pegasus
+"""
+
+from typing import Any, Dict, List, Optional
+
+from litellm.llms.base_llm.base_utils import type_to_response_format_param
+from litellm.llms.base_llm.chat.transformation import BaseConfig
+from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
+    AmazonInvokeConfig,
+)
+from litellm.types.llms.openai import AllMessageValues
+from litellm.utils import get_base64_str
+
+
+class AmazonTwelveLabsPegasusConfig(AmazonInvokeConfig, BaseConfig):
+    """
+    Handles transforming OpenAI-style requests into Bedrock InvokeModel requests for
+    `twelvelabs.pegasus-1-2-v1:0`.
+
+    Pegasus 1.2 requires an `inputPrompt` and a `mediaSource` that either references
+    an S3 object or a base64-encoded clip. Optional OpenAI params (temperature,
+    response_format, max_tokens) are translated to the TwelveLabs schema.
+    """
+
+    def get_supported_openai_params(self, model: str) -> List[str]:
+        return [
+            "max_tokens",
+            "max_completion_tokens",
+            "temperature",
+            "response_format",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        for param, value in non_default_params.items():
+            if param in {"max_tokens", "max_completion_tokens"}:
+                optional_params["maxOutputTokens"] = value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "response_format":
+                optional_params["responseFormat"] = self._normalize_response_format(
+                    value
+                )
+        return optional_params
+
+    def _normalize_response_format(self, value: Any) -> Any:
+        if isinstance(value, dict):
+            return value
+        return type_to_response_format_param(response_format=value) or value
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        input_prompt = self._convert_messages_to_prompt(messages=messages)
+        request_data: Dict[str, Any] = {"inputPrompt": input_prompt}
+
+        media_source = self._build_media_source(optional_params)
+        if media_source is not None:
+            request_data["mediaSource"] = media_source
+
+        for key in ("temperature", "maxOutputTokens", "responseFormat"):
+            if key in optional_params:
+                request_data[key] = optional_params.get(key)
+        return request_data
+
+    def _build_media_source(self, optional_params: dict) -> Optional[dict]:
+        direct_source = optional_params.get("mediaSource") or optional_params.get(
+            "media_source"
+        )
+        if isinstance(direct_source, dict):
+            return direct_source
+
+        base64_input = optional_params.get("video_base64") or optional_params.get(
+            "base64_string"
+        )
+        if base64_input:
+            return {"base64String": get_base64_str(base64_input)}
+
+        s3_uri = (
+            optional_params.get("video_s3_uri")
+            or optional_params.get("s3_uri")
+            or optional_params.get("media_source_s3_uri")
+        )
+        if s3_uri:
+            s3_location = {"uri": s3_uri}
+            bucket_owner = (
+                optional_params.get("video_s3_bucket_owner")
+                or optional_params.get("s3_bucket_owner")
+                or optional_params.get("media_source_bucket_owner")
+            )
+            if bucket_owner:
+                s3_location["bucketOwner"] = bucket_owner
+            return {"s3Location": s3_location}
+        return None
+
+    def _convert_messages_to_prompt(self, messages: List[AllMessageValues]) -> str:
+        prompt_parts: List[str] = []
+        for message in messages:
+            role = message.get("role", "user")
+            content = message.get("content", "")
+            if isinstance(content, list):
+                text_fragments = []
+                for item in content:
+                    if isinstance(item, dict):
+                        item_type = item.get("type")
+                        if item_type == "text":
+                            text_fragments.append(item.get("text", ""))
+                        elif item_type == "image_url":
+                            text_fragments.append("<image>")
+                        elif item_type == "video_url":
+                            text_fragments.append("<video>")
+                        elif item_type == "audio_url":
+                            text_fragments.append("<audio>")
+                    elif isinstance(item, str):
+                        text_fragments.append(item)
+                content = " ".join(text_fragments)
+            prompt_parts.append(f"{role}: {content}")
+        return "\n".join(part for part in prompt_parts if part).strip()
+
diff --git a/litellm/llms/bedrock/common_utils.py b/litellm/llms/bedrock/common_utils.py
@@ -616,6 +616,8 @@ def get_bedrock_chat_config(model: str):
         return litellm.AmazonInvokeNovaConfig()
     elif bedrock_invoke_provider == "qwen3":
         return litellm.AmazonQwen3Config()
+    elif bedrock_invoke_provider == "twelvelabs":
+        return litellm.AmazonTwelveLabsPegasusConfig()
     else:
         return litellm.AmazonInvokeConfig()
 
diff --git a/tests/test_litellm/llms/bedrock/chat/invoke_transformations/test_twelvelabs_pegasus_transformation.py b/tests/test_litellm/llms/bedrock/chat/invoke_transformations/test_twelvelabs_pegasus_transformation.py
@@ -0,0 +1,85 @@
+from litellm.llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation import (
+    AmazonTwelveLabsPegasusConfig,
+)
+
+
+def _make_messages() -> list[dict]:
+    return [
+        {"role": "system", "content": "You are an assistant"},
+        {"role": "user", "content": "Summarize the attached video."},
+    ]
+
+
+def test_supported_openai_params():
+    config = AmazonTwelveLabsPegasusConfig()
+    supported = config.get_supported_openai_params("twelvelabs.pegasus-1-2-v1:0")
+    assert "max_tokens" in supported
+    assert "temperature" in supported
+    assert "response_format" in supported
+
+
+def test_map_openai_params_translates_fields():
+    config = AmazonTwelveLabsPegasusConfig()
+    optional_params: dict = {}
+    config.map_openai_params(
+        non_default_params={
+            "max_tokens": 20,
+            "temperature": 0.6,
+            "response_format": {
+                "type": "json_schema",
+                "json_schema": {"name": "video_schema", "schema": {"type": "object"}},
+            },
+        },
+        optional_params=optional_params,
+        model="twelvelabs.pegasus-1-2-v1:0",
+        drop_params=False,
+    )
+
+    assert optional_params["maxOutputTokens"] == 20
+    assert optional_params["temperature"] == 0.6
+    assert "responseFormat" in optional_params
+    assert optional_params["responseFormat"]["json_schema"]["name"] == "video_schema"
+
+
+def test_transform_request_includes_base64_media():
+    config = AmazonTwelveLabsPegasusConfig()
+    optional_params = config.map_openai_params(
+        non_default_params={"max_tokens": 10},
+        optional_params={},
+        model="twelvelabs.pegasus-1-2-v1:0",
+        drop_params=False,
+    )
+    optional_params["video_base64"] = "data:video/mp4;base64,AAA"
+
+    request = config.transform_request(
+        model="twelvelabs.pegasus-1-2-v1:0",
+        messages=_make_messages(),
+        optional_params=optional_params,
+        litellm_params={},
+        headers={},
+    )
+
+    assert request["inputPrompt"].startswith("system:")
+    assert request["mediaSource"]["base64String"] == "AAA"
+    assert request["maxOutputTokens"] == 10
+
+
+def test_transform_request_includes_s3_media():
+    config = AmazonTwelveLabsPegasusConfig()
+    optional_params = {
+        "video_s3_uri": "s3://test-bucket/video.mp4",
+        "video_s3_bucket_owner": "123456789012",
+    }
+
+    request = config.transform_request(
+        model="twelvelabs.pegasus-1-2-v1:0",
+        messages=_make_messages(),
+        optional_params=optional_params,
+        litellm_params={},
+        headers={},
+    )
+
+    s3_location = request["mediaSource"]["s3Location"]
+    assert s3_location["uri"] == "s3://test-bucket/video.mp4"
+    assert s3_location["bucketOwner"] == "123456789012"
+

Original file line number	Diff line number	Diff line change
`@@ -1222,6 +1222,9 @@ def add_known_models():`
`1222`	`1222`	`from .llms.bedrock.chat.invoke_transformations.amazon_titan_transformation import (`
`1223`	`1223`	`AmazonTitanConfig,`
`1224`	`1224`	`)`
	`1225`	`+from .llms.bedrock.chat.invoke_transformations.amazon_twelvelabs_pegasus_transformation import (`
	`1226`	`+ AmazonTwelveLabsPegasusConfig,`
	`1227`	`+)`
`1225`	`1228`	`from .llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (`
`1226`	`1229`	`AmazonInvokeConfig,`
`1227`	`1230`	`)`
Original file line number	Diff line number	Diff line change
`@@ -851,6 +851,7 @@`
`851`	`851`	`"nova",`
`852`	`852`	`"deepseek_r1",`
`853`	`853`	`"qwen3",`
	`854`	`+ "twelvelabs",`
`854`	`855`	`]`
`855`	`856`
`856`	`857`	`BEDROCK_EMBEDDING_PROVIDERS_LITERAL = Literal[`