Fix pegasus response and add doc

Sameerlite · Sameerlite · commit 9d058398dfda · 2025-11-28T21:41:25.000+05:30
diff --git a/docs/my-website/docs/providers/bedrock.md b/docs/my-website/docs/providers/bedrock.md
@@ -1683,6 +1683,131 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 </TabItem>
 </Tabs>
 
+## TwelveLabs Pegasus - Video Understanding
+
+TwelveLabs Pegasus 1.2 is a video understanding model that can analyze and describe video content. LiteLLM supports this model through Bedrock's `/invoke` endpoint.
+
+| Property | Details |
+|----------|---------|
+| Provider Route | `bedrock/us.twelvelabs.pegasus-1-2-v1:0`, `bedrock/eu.twelvelabs.pegasus-1-2-v1:0` |
+| Provider Documentation | [TwelveLabs Pegasus Docs ↗](https://docs.twelvelabs.io/docs/models/pegasus) |
+| Supported Parameters | `max_tokens`, `temperature`, `response_format` |
+| Media Input | S3 URI or base64-encoded video |
+
+### Supported Features
+
+- **Video Analysis**: Analyze video content from S3 or base64 input
+- **Structured Output**: Support for JSON schema response format
+- **S3 Integration**: Support for S3 video URLs with bucket owner specification
+
+### Usage with S3 Video
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python title="TwelveLabs Pegasus SDK Usage" showLineNumbers
+from litellm import completion
+import os
+
+# Set AWS credentials
+os.environ["AWS_ACCESS_KEY_ID"] = "your-aws-access-key"
+os.environ["AWS_SECRET_ACCESS_KEY"] = "your-aws-secret-key"
+os.environ["AWS_REGION_NAME"] = "us-east-1"
+
+response = completion(
+    model="bedrock/us.twelvelabs.pegasus-1-2-v1:0",
+    messages=[{"role": "user", "content": "Describe what happens in this video."}],
+    mediaSource={
+        "s3Location": {
+            "uri": "s3://your-bucket/video.mp4",
+            "bucketOwner": "123456789012",  # 12-digit AWS account ID
+        }
+    },
+    temperature=0.2
+)
+
+print(response.choices[0].message.content)
+```
+
+</TabItem>
+
+<TabItem value="proxy" label="Proxy">
+
+**1. Add to config**
+
+```yaml title="config.yaml" showLineNumbers
+model_list:
+  - model_name: pegasus-video
+    litellm_params:
+      model: bedrock/us.twelvelabs.pegasus-1-2-v1:0
+      aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
+      aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
+      aws_region_name: os.environ/AWS_REGION_NAME
+```
+
+**2. Start proxy**
+
+```bash title="Start LiteLLM Proxy" showLineNumbers
+litellm --config /path/to/config.yaml
+
+# RUNNING at http://0.0.0.0:4000
+```
+
+**3. Test it!**
+
+```bash title="Test Pegasus via Proxy" showLineNumbers
+curl --location 'http://0.0.0.0:4000/chat/completions' \
+  --header 'Authorization: Bearer sk-1234' \
+  --header 'Content-Type: application/json' \
+  --data '{
+    "model": "pegasus-video",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Describe what happens in this video."
+      }
+    ],
+    "mediaSource": {
+      "s3Location": {
+        "uri": "s3://your-bucket/video.mp4",
+        "bucketOwner": "123456789012"
+      }
+    },
+    "temperature": 0.2
+  }'
+```
+
+</TabItem>
+</Tabs>
+
+### Usage with Base64 Video
+
+You can also pass video content directly as base64:
+
+```python title="Base64 Video Input" showLineNumbers
+from litellm import completion
+import base64
+
+# Read video file and encode to base64
+with open("video.mp4", "rb") as video_file:
+    video_base64 = base64.b64encode(video_file.read()).decode("utf-8")
+
+response = completion(
+    model="bedrock/us.twelvelabs.pegasus-1-2-v1:0",
+    messages=[{"role": "user", "content": "What is happening in this video?"}],
+    mediaSource={
+        "base64String": video_base64
+    },
+    temperature=0.2,
+)
+
+print(response.choices[0].message.content)
+```
+
+### Important Notes
+
+- **Response Format**: The model supports structured output via `response_format` with JSON schema
+
 ## Provisioned throughput models
 To use provisioned throughput Bedrock models pass 
 - `model=bedrock/<base-model>`, example `model=bedrock/anthropic.claude-v2`. Set `model` to any of the [Supported AWS models](#supported-aws-bedrock-models)
@@ -1743,6 +1868,8 @@ Here's an example of using a bedrock model with LiteLLM. For a complete list, re
 | Meta Llama 2 Chat 70b      | `completion(model='bedrock/meta.llama2-70b-chat-v1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | Mistral 7B Instruct        | `completion(model='bedrock/mistral.mistral-7b-instruct-v0:2', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 | Mixtral 8x7B Instruct      | `completion(model='bedrock/mistral.mixtral-8x7b-instruct-v0:1', messages=messages)`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
+| TwelveLabs Pegasus 1.2 (US) | `completion(model='bedrock/us.twelvelabs.pegasus-1-2-v1:0', messages=messages, mediaSource={...})`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
+| TwelveLabs Pegasus 1.2 (EU) | `completion(model='bedrock/eu.twelvelabs.pegasus-1-2-v1:0', messages=messages, mediaSource={...})`   | `os.environ['AWS_ACCESS_KEY_ID']`, `os.environ['AWS_SECRET_ACCESS_KEY']`, `os.environ['AWS_REGION_NAME']` |
 
 
 ## Bedrock Embedding
diff --git a/litellm/llms/bedrock/chat/invoke_transformations/amazon_twelvelabs_pegasus_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/amazon_twelvelabs_pegasus_transformation.py
@@ -5,16 +5,32 @@
 https://docs.twelvelabs.io/docs/models/pegasus
 """
 
-from typing import Any, Dict, List, Optional
+import json
+import time
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
+import httpx
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
 from litellm.llms.base_llm.base_utils import type_to_response_format_param
 from litellm.llms.base_llm.chat.transformation import BaseConfig
 from litellm.llms.bedrock.chat.invoke_transformations.base_invoke_transformation import (
     AmazonInvokeConfig,
 )
+from litellm.llms.bedrock.common_utils import BedrockError
 from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import ModelResponse, Usage
 from litellm.utils import get_base64_str
 
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+else:
+    LiteLLMLoggingObj = Any
+
 
 class AmazonTwelveLabsPegasusConfig(AmazonInvokeConfig, BaseConfig):
     """
@@ -53,7 +69,35 @@ def map_openai_params(
         return optional_params
 
     def _normalize_response_format(self, value: Any) -> Any:
+        """Normalize response_format to TwelveLabs format.
+        
+        TwelveLabs expects:
+        {
+            "jsonSchema": {...}
+        }
+        
+        But OpenAI format is:
+        {
+            "type": "json_schema",
+            "json_schema": {
+                "name": "...",
+                "schema": {...}
+            }
+        }
+        """
         if isinstance(value, dict):
+            # If it has json_schema field, extract and transform it
+            if "json_schema" in value:
+                json_schema = value["json_schema"]
+                # Extract the schema if nested
+                if isinstance(json_schema, dict) and "schema" in json_schema:
+                    return {"jsonSchema": json_schema["schema"]}
+                # Otherwise use json_schema directly
+                return {"jsonSchema": json_schema}
+            # If it already has jsonSchema, return as is
+            if "jsonSchema" in value:
+                return value
+            # Otherwise return the dict as is
             return value
         return type_to_response_format_param(response_format=value) or value
 
@@ -72,9 +116,18 @@ def transform_request(
         if media_source is not None:
             request_data["mediaSource"] = media_source
 
-        for key in ("temperature", "maxOutputTokens", "responseFormat"):
+        # Handle temperature and maxOutputTokens
+        for key in ("temperature", "maxOutputTokens"):
             if key in optional_params:
                 request_data[key] = optional_params.get(key)
+        
+        # Handle responseFormat - transform to TwelveLabs format
+        if "responseFormat" in optional_params:
+            response_format = optional_params["responseFormat"]
+            transformed_format = self._normalize_response_format(response_format)
+            if transformed_format:
+                request_data["responseFormat"] = transformed_format
+        
         return request_data
 
     def _build_media_source(self, optional_params: dict) -> Optional[dict]:
@@ -131,3 +184,97 @@ def _convert_messages_to_prompt(self, messages: List[AllMessageValues]) -> str:
             prompt_parts.append(f"{role}: {content}")
         return "\n".join(part for part in prompt_parts if part).strip()
 
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        """
+        Transform TwelveLabs Pegasus response to LiteLLM format.
+        
+        TwelveLabs response format:
+        {
+            "message": "...",
+            "finishReason": "stop" | "length"
+        }
+        
+        LiteLLM format:
+        ModelResponse with choices[0].message.content and finish_reason
+        """
+        try:
+            completion_response = raw_response.json()
+        except Exception as e:
+            raise BedrockError(
+                message=f"Error parsing response: {raw_response.text}, error: {str(e)}",
+                status_code=raw_response.status_code,
+            )
+        
+        verbose_logger.debug(
+            "twelvelabs pegasus response: %s",
+            json.dumps(completion_response, indent=4, default=str),
+        )
+        
+        # Extract message content
+        message_content = completion_response.get("message", "")
+        
+        # Extract finish reason and map to LiteLLM format
+        finish_reason_raw = completion_response.get("finishReason", "stop")
+        finish_reason = map_finish_reason(finish_reason_raw)
+        
+        # Set the response content
+        try:
+            if (
+                message_content
+                and hasattr(model_response.choices[0], "message")
+                and getattr(model_response.choices[0].message, "tool_calls", None) is None
+            ):
+                model_response.choices[0].message.content = message_content  # type: ignore
+                model_response.choices[0].finish_reason = finish_reason
+            else:
+                raise Exception("Unable to set message content")
+        except Exception as e:
+            raise BedrockError(
+                message=f"Error setting response content: {str(e)}. Response: {completion_response}",
+                status_code=raw_response.status_code,
+            )
+        
+        # Calculate usage from headers
+        bedrock_input_tokens = raw_response.headers.get(
+            "x-amzn-bedrock-input-token-count", None
+        )
+        bedrock_output_tokens = raw_response.headers.get(
+            "x-amzn-bedrock-output-token-count", None
+        )
+        
+        prompt_tokens = int(
+            bedrock_input_tokens or litellm.token_counter(messages=messages)
+        )
+        
+        completion_tokens = int(
+            bedrock_output_tokens
+            or litellm.token_counter(
+                text=model_response.choices[0].message.content,  # type: ignore
+                count_response_tokens=True,
+            )
+        )
+        
+        model_response.created = int(time.time())
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        setattr(model_response, "usage", usage)
+        
+        return model_response
+
diff --git a/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py b/litellm/llms/bedrock/chat/invoke_transformations/base_invoke_transformation.py
@@ -250,6 +250,14 @@ def transform_request(
                 ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
                     inference_params[k] = v
             request_data = {"prompt": prompt, **inference_params}
+        elif provider == "twelvelabs":
+            return litellm.AmazonTwelveLabsPegasusConfig().transform_request(
+                model=model,
+                messages=messages,
+                optional_params=optional_params,
+                litellm_params=litellm_params,
+                headers=headers,
+            )
         else:
             raise BedrockError(
                 status_code=404,
@@ -321,6 +329,20 @@ def transform_response(  # noqa: PLR0915
                     litellm_params=litellm_params,
                     encoding=encoding,
                 )
+            elif provider == "twelvelabs":
+                return litellm.AmazonTwelveLabsPegasusConfig().transform_response(
+                    model=model,
+                    raw_response=raw_response,
+                    model_response=model_response,
+                    logging_obj=logging_obj,
+                    request_data=request_data,
+                    messages=messages,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    encoding=encoding,
+                    api_key=api_key,
+                    json_mode=json_mode,
+                )
             elif provider == "ai21":
                 outputText = (
                     completion_response.get("completions")[0].get("data").get("text")
diff --git a/tests/test_litellm/llms/bedrock/chat/invoke_transformations/test_twelvelabs_pegasus_transformation.py b/tests/test_litellm/llms/bedrock/chat/invoke_transformations/test_twelvelabs_pegasus_transformation.py
@@ -38,7 +38,9 @@ def test_map_openai_params_translates_fields():
     assert optional_params["maxOutputTokens"] == 20
     assert optional_params["temperature"] == 0.6
     assert "responseFormat" in optional_params
-    assert optional_params["responseFormat"]["json_schema"]["name"] == "video_schema"
+    # TwelveLabs format: responseFormat contains jsonSchema directly (not json_schema)
+    assert "jsonSchema" in optional_params["responseFormat"]
+    assert optional_params["responseFormat"]["jsonSchema"]["type"] == "object"
 
 
 def test_transform_request_includes_base64_media():