diff --git a/docs/Usage.md b/docs/Usage.md index 18a04e5..bc3a79d 100644 --- a/docs/Usage.md +++ b/docs/Usage.md @@ -94,7 +94,6 @@ print(doc_result[0][:5]) **Important Notice**: Please carefully review the following points before using this proxy API for Multimodal. 1. This API is only supported by Claude 3 model. -2. You should ensure the image url can be publicly accessible in the Lambda/Fargate. **Example Request** @@ -124,6 +123,35 @@ curl $OPENAI_BASE_URL/chat/completions \ }' ``` +If you need to use this API with non-public images, you can do base64 the image first and pass the encoded string. +Replace `image/jpeg` with the actual content type. Currently, Only 'image/jpeg', 'image/png', 'image/gif' or 'image/webp' is supported. + +```bash +curl $OPENAI_BASE_URL/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "please identify and count all the objects in this images, list all the names" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64," + } + } + ] + } + ] +}' +``` + **Example Response** ```json diff --git a/docs/Usage_CN.md b/docs/Usage_CN.md index 27d1a30..00279d8 100644 --- a/docs/Usage_CN.md +++ b/docs/Usage_CN.md @@ -93,7 +93,6 @@ print(doc_result[0][:5]) **重要**:在使用此代理API进行多模态处理之前,请仔细阅读以下几点: 1. 此API 仅支持Claude 3模型。 -2. 您应确保 Lambda/Fargate 可以公开访问该图片URL。 **Request 示例** @@ -123,6 +122,35 @@ curl $OPENAI_BASE_URL/chat/completions \ }' ``` +如果您需要使用此API处理非公开图像,您可以先对图像进行base64编码,然后传递编码后的字符串。 +将"image/jpeg"替换为实际的内容类型(content type)。目前仅支持"image/jpeg"、"image/png"、"image/gif"或"image/webp"。 + +```bash +curl $OPENAI_BASE_URL/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-3.5-turbo", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "please identify and count all the objects in this images, list all the names" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/jpeg;base64," + } + } + ] + } + ] +}' +``` + **Response 示例** ```json diff --git a/src/api/models/bedrock.py b/src/api/models/bedrock.py index ddb0d01..5ada2cf 100644 --- a/src/api/models/bedrock.py +++ b/src/api/models/bedrock.py @@ -1,6 +1,7 @@ import base64 import json import logging +import re from abc import ABC from typing import AsyncIterable, Iterable @@ -175,16 +176,32 @@ class ClaudeModel(BedrockModel): ) ] - def _get_base64_image(self, image_url: str) -> str: + def _get_base64_image(self, image_url: str) -> tuple[str, str]: + """Try to get the base64 data from an image url. + + returns a tuple of (Image Data, Content Type) + """ + pattern = r"^data:(image/[a-z]*);base64,\s*" + content_type = re.search(pattern, image_url) + # if already base64 encoded. + # Only supports 'image/jpeg', 'image/png', 'image/gif' or 'image/webp' + if content_type: + image_data = re.sub(pattern, "", image_url) + return image_data, content_type.group(1) + # Send a request to the image URL response = requests.get(image_url) + content_type = response.headers.get('Content-Type') + if not content_type.startswith("image"): + content_type = "image/jpeg" + # Check if the request was successful if response.status_code == 200: # Get the image content image_content = response.content # Encode the image content as base64 base64_image = base64.b64encode(image_content) - return base64_image.decode("utf-8") + return base64_image.decode("utf-8"), content_type else: raise HTTPException( status_code=500, detail="Unable to access the image url" @@ -199,13 +216,14 @@ class ClaudeModel(BedrockModel): if isinstance(part, TextContent): content_parts.append(part.model_dump()) else: + image_data, content_type = self._get_base64_image(part.image_url.url) content_parts.append( { "type": "image", "source": { "type": "base64", - "media_type": "image/jpeg", - "data": self._get_base64_image(part.image_url.url), + "media_type": content_type, + "data": image_data, }, } )