-
Notifications
You must be signed in to change notification settings - Fork 10
Anthropic and Google Vertex: Add Claude and Gemini to llm_call endpoint #896
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bf9ae80
b279ff7
8f5e6b5
8cc64b1
115bea2
7fc2ad1
a7eff4b
6a0fdb8
5255ffa
7a7b7ed
1fb5a53
9ff5c72
3bd2206
8afea28
b6605c4
534c5eb
3c387e7
9b3494f
3baa4b2
ab82e49
c93e77d
bf1565a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,4 +4,5 @@ | |
| CloudStorage, | ||
| CloudStorageError, | ||
| get_cloud_storage, | ||
| upload_audio_to_gcs, | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,8 @@ | ||
| import os | ||
| import mimetypes | ||
| import filetype | ||
| from sqlmodel import Session | ||
| from uuid import UUID | ||
| from uuid import UUID, uuid4 | ||
| import logging | ||
| import functools as ft | ||
| from pathlib import Path | ||
|
|
@@ -12,10 +14,19 @@ | |
| from fastapi import UploadFile | ||
| from botocore.exceptions import ClientError | ||
| from botocore.response import StreamingBody | ||
| from google.cloud import storage as gcs | ||
| from google.oauth2 import service_account | ||
|
|
||
| from app.crud import get_project_by_id | ||
| from app.core.config import settings | ||
| from app.utils import mask_string | ||
|
|
||
|
|
||
| def _mask(value: str | None) -> str: | ||
| # Lazy to break a top-level cycle: app.utils transitively imports | ||
| # app.services.llm.providers, which imports this module. | ||
| from app.utils import mask_string | ||
|
|
||
| return mask_string(value) | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -46,7 +57,7 @@ def create(self): | |
| except ValueError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorageClient.create] Invalid bucket configuration | " | ||
| f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}'}}", | ||
| f"{{'bucket': '{_mask(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(err) from err | ||
|
|
@@ -55,13 +66,13 @@ def create(self): | |
| if response != 404: | ||
| logger.error( | ||
| f"[AmazonCloudStorageClient.create] Unexpected AWS error | " | ||
| f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}', 'code': {response}}}", | ||
| f"{{'bucket': '{_mask(settings.AWS_S3_BUCKET)}', 'error': '{str(err)}', 'code': {response}}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(err) from err | ||
| logger.warning( | ||
| f"[AmazonCloudStorageClient.create] Bucket not found, creating | " | ||
| f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}'}}" | ||
| f"{{'bucket': '{_mask(settings.AWS_S3_BUCKET)}'}}" | ||
| ) | ||
| try: | ||
| self.client.create_bucket( | ||
|
|
@@ -72,12 +83,12 @@ def create(self): | |
| ) | ||
| logger.info( | ||
| f"[AmazonCloudStorageClient.create] Bucket created successfully | " | ||
| f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}'}}" | ||
| f"{{'bucket': '{_mask(settings.AWS_S3_BUCKET)}'}}" | ||
| ) | ||
| except ClientError as create_err: | ||
| logger.error( | ||
| f"[AmazonCloudStorageClient.create] Failed to create bucket | " | ||
| f"{{'bucket': '{mask_string(settings.AWS_S3_BUCKET)}', 'error': '{str(create_err)}'}}", | ||
| f"{{'bucket': '{_mask(settings.AWS_S3_BUCKET)}', 'error': '{str(create_err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(create_err) from create_err | ||
|
|
@@ -168,12 +179,12 @@ def put(self, source: UploadFile, file_path: Path) -> SimpleStorageName: | |
| ) | ||
| logger.info( | ||
| f"[AmazonCloudStorage.put] File uploaded successfully | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}'}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(destination.Bucket)}', 'key': '{_mask(destination.Key)}'}}" | ||
| ) | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.put] AWS upload error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(destination.Bucket)}', 'key': '{mask_string(destination.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(destination.Bucket)}', 'key': '{_mask(destination.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}"') from err | ||
|
|
@@ -187,13 +198,13 @@ def stream(self, url: str) -> StreamingBody: | |
| body = self.aws.client.get_object(**kwargs).get("Body") | ||
| logger.info( | ||
| f"[AmazonCloudStorage.stream] File streamed successfully | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}'}}" | ||
| ) | ||
| return body | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.stream] AWS stream error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err | ||
|
|
@@ -206,13 +217,13 @@ def get(self, url: str) -> bytes: | |
| content = body.read() | ||
| logger.info( | ||
| f"[AmazonCloudStorage.get] File retrieved successfully | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'size_bytes': {len(content)}}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'size_bytes': {len(content)}}}" | ||
| ) | ||
| return content | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.get] AWS get error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err | ||
|
|
@@ -226,13 +237,13 @@ def get_file_size_kb(self, url: str) -> float: | |
| size_kb = round(size_bytes / 1024, 2) | ||
| logger.info( | ||
| f"[AmazonCloudStorage.get_file_size_kb] File size retrieved successfully | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'size_kb': {size_kb}}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'size_kb': {size_kb}}}" | ||
| ) | ||
| return size_kb | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.get_file_size_kb] AWS head object error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err | ||
|
|
@@ -259,13 +270,13 @@ def get_signed_url(self, url: str, expires_in: int = 3600) -> str: | |
| ) | ||
| logger.info( | ||
| f"[AmazonCloudStorage.get_signed_url] Signed URL generated | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}'}}" | ||
| ) | ||
| return signed_url | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.get_signed_url] AWS presign error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err | ||
|
|
@@ -277,12 +288,12 @@ def delete(self, url: str) -> None: | |
| self.aws.client.delete_object(**kwargs) | ||
| logger.info( | ||
| f"[AmazonCloudStorage.delete] File deleted successfully | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}'}}" | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}'}}" | ||
| ) | ||
| except ClientError as err: | ||
| logger.error( | ||
| f"[AmazonCloudStorage.delete] AWS delete error | " | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{mask_string(name.Bucket)}', 'key': '{mask_string(name.Key)}', 'error': '{str(err)}'}}", | ||
| f"{{'project_id': '{self.project_id}', 'bucket': '{_mask(name.Bucket)}', 'key': '{_mask(name.Key)}', 'error': '{str(err)}'}}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err | ||
|
|
@@ -292,6 +303,11 @@ def get_cloud_storage(session: Session, project_id: int) -> CloudStorage: | |
| """ | ||
| Method to create and configure a cloud storage instance. | ||
| """ | ||
| # Lazy import to avoid a top-level cycle: storage.py is imported from | ||
| # app.services.llm.providers.gai_vertex, which itself is wired into the | ||
| # provider registry that app.crud transitively pulls in. | ||
| from app.crud import get_project_by_id | ||
|
|
||
| project = get_project_by_id(session=session, project_id=project_id) | ||
| if not project: | ||
| raise ValueError(f"Invalid project_id: {project_id}") | ||
|
|
@@ -306,3 +322,103 @@ def get_cloud_storage(session: Session, project_id: int) -> CloudStorage: | |
| exc_info=True, | ||
| ) | ||
| raise | ||
|
|
||
|
|
||
| GCS_SCOPES = ("https://www.googleapis.com/auth/cloud-platform",) | ||
|
|
||
| MAX_AUDIO_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB | ||
|
|
||
| _MIME_TO_EXT = { | ||
| "audio/wav": ".wav", | ||
| "audio/mpeg": ".mp3", | ||
| "audio/mp3": ".mp3", | ||
| "audio/ogg": ".ogg", | ||
| "audio/flac": ".flac", | ||
| "audio/webm": ".webm", | ||
| "audio/aac": ".aac", | ||
| "audio/aiff": ".aiff", | ||
| } | ||
|
|
||
|
|
||
| def upload_audio_to_gcs( | ||
| *, | ||
| bucket_name: str, | ||
| sa_info: dict, | ||
| audio_bytes: bytes | None = None, | ||
| local_path: str | None = None, | ||
| content_type: str | None = None, | ||
| project_id: str | None = None, | ||
| key_prefix: str = "audio", | ||
| ) -> str: | ||
| """Upload audio to GCS and return its ``gs://bucket/key`` URI. | ||
|
|
||
| Pass exactly one of ``audio_bytes`` or ``local_path``. | ||
|
|
||
| BYOK: caller supplies ``sa_info`` and ``bucket_name``. The returned URI | ||
| plugs directly into Vertex ``fileData.fileUri``. | ||
| """ | ||
| if (audio_bytes is None) == (local_path is None): | ||
| raise ValueError("Pass exactly one of audio_bytes or local_path") | ||
|
|
||
| if local_path is not None: | ||
| if not os.path.isfile(local_path): | ||
| raise FileNotFoundError(f"Audio file not found: {local_path}") | ||
| size = os.path.getsize(local_path) | ||
| ext = Path(local_path).suffix or "" | ||
| mime = content_type or mimetypes.guess_type(local_path)[0] or "audio/wav" | ||
| else: | ||
| if not audio_bytes: | ||
| raise ValueError("audio_bytes is empty") | ||
| size = len(audio_bytes) | ||
| mime = content_type or "audio/wav" | ||
| ext = _MIME_TO_EXT.get(mime, "") | ||
|
|
||
| if mime not in _MIME_TO_EXT: | ||
| raise ValueError( | ||
| f"Unsupported content_type '{mime}'. Allowed: " | ||
| f"{', '.join(sorted(_MIME_TO_EXT))}" | ||
| ) | ||
|
|
||
| # Sniff the actual bytes — content_type is caller-supplied and spoofable. | ||
| sniff_source = audio_bytes if audio_bytes is not None else local_path | ||
| detected = filetype.guess(sniff_source) | ||
| if detected is None or not detected.mime.startswith("audio/"): | ||
| raise ValueError( | ||
| f"Uploaded content is not a recognised audio file " | ||
| f"(detected={detected.mime if detected else 'unknown'})" | ||
| ) | ||
|
|
||
| if size > MAX_AUDIO_UPLOAD_BYTES: | ||
| raise ValueError( | ||
| f"Audio exceeds {MAX_AUDIO_UPLOAD_BYTES // (1024 * 1024)} MB limit " | ||
| f"(got {size / (1024 * 1024):.1f} MB)" | ||
| ) | ||
|
|
||
| key = f"{key_prefix}/{uuid4().hex}{ext}" | ||
|
|
||
| try: | ||
| creds = service_account.Credentials.from_service_account_info( | ||
| sa_info, scopes=list(GCS_SCOPES) | ||
| ) | ||
| client = gcs.Client( | ||
| project=project_id or sa_info.get("project_id"), credentials=creds | ||
| ) | ||
| blob = client.bucket(bucket_name).blob(key) | ||
| if local_path is not None: | ||
| blob.upload_from_filename(local_path, content_type=mime) | ||
| else: | ||
| blob.upload_from_string(audio_bytes, content_type=mime) | ||
| except Exception as e: | ||
| logger.error( | ||
| f"[upload_audio_to_gcs] Upload failed | " | ||
| f"bucket={bucket_name}, key={key}, error={e}", | ||
| exc_info=True, | ||
| ) | ||
| raise CloudStorageError(f"GCS upload failed: {e}") from e | ||
|
|
||
| uri = f"gs://{bucket_name}/{key}" | ||
| logger.info( | ||
| f"[upload_audio_to_gcs] Uploaded | " | ||
| f"uri={uri}, mime={mime}, size_kb={size / 1024:.1f}" | ||
|
Comment on lines
+412
to
+422
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mask the staged GCS identifiers in these logs. Lines 387-397 log raw 🤖 Prompt for AI Agents |
||
| ) | ||
| return uri | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add the missing WAV MIME aliases here.
AudioRef.to_path()now relies on this table to pick the temp-file suffix, butaudio/waveandaudio/x-wavare missing even thoughbackend/app/utils.py:get_file_extension()already treats them as WAV. Valid WAV uploads using those MIME types will therefore materialize as*.audio, which can break SDKs that infer the format from the filename extension.Suggested fix
_MIME_TO_EXT = { "audio/wav": ".wav", + "audio/wave": ".wav", + "audio/x-wav": ".wav", "audio/mpeg": ".mp3", "audio/mp3": ".mp3", "audio/ogg": ".ogg",🤖 Prompt for AI Agents