Add support of encoded image url

This commit is contained in:
Aiden Dai
2024-04-09 20:43:29 +08:00
parent 1888fa1c98
commit 81716238ec
3 changed files with 80 additions and 6 deletions

View File

@@ -94,7 +94,6 @@ print(doc_result[0][:5])
**Important Notice**: Please carefully review the following points before using this proxy API for Multimodal.
1. This API is only supported by Claude 3 model.
2. You should ensure the image url can be publicly accessible in the Lambda/Fargate.
**Example Request**
@@ -124,6 +123,35 @@ curl $OPENAI_BASE_URL/chat/completions \
}'
```
If you need to use this API with non-public images, you can do base64 the image first and pass the encoded string.
Replace `image/jpeg` with the actual content type. Currently, Only 'image/jpeg', 'image/png', 'image/gif' or 'image/webp' is supported.
```bash
curl $OPENAI_BASE_URL/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-d '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "please identify and count all the objects in this images, list all the names"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,<your image data>"
}
}
]
}
]
}'
```
**Example Response**
```json

View File

@@ -93,7 +93,6 @@ print(doc_result[0][:5])
**重要**:在使用此代理API进行多模态处理之前,请仔细阅读以下几点:
1. 此API 仅支持Claude 3模型。
2. 您应确保 Lambda/Fargate 可以公开访问该图片URL。
**Request 示例**
@@ -123,6 +122,35 @@ curl $OPENAI_BASE_URL/chat/completions \
}'
```
如果您需要使用此API处理非公开图像,您可以先对图像进行base64编码,然后传递编码后的字符串。
将"image/jpeg"替换为实际的内容类型(content type)。目前仅支持"image/jpeg"、"image/png"、"image/gif"或"image/webp"。
```bash
curl $OPENAI_BASE_URL/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-d '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "please identify and count all the objects in this images, list all the names"
},
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,<your image data>"
}
}
]
}
]
}'
```
**Response 示例**
```json

View File

@@ -1,6 +1,7 @@
import base64
import json
import logging
import re
from abc import ABC
from typing import AsyncIterable, Iterable
@@ -175,16 +176,32 @@ class ClaudeModel(BedrockModel):
)
]
def _get_base64_image(self, image_url: str) -> str:
def _get_base64_image(self, image_url: str) -> tuple[str, str]:
"""Try to get the base64 data from an image url.
returns a tuple of (Image Data, Content Type)
"""
pattern = r"^data:(image/[a-z]*);base64,\s*"
content_type = re.search(pattern, image_url)
# if already base64 encoded.
# Only supports 'image/jpeg', 'image/png', 'image/gif' or 'image/webp'
if content_type:
image_data = re.sub(pattern, "", image_url)
return image_data, content_type.group(1)
# Send a request to the image URL
response = requests.get(image_url)
content_type = response.headers.get('Content-Type')
if not content_type.startswith("image"):
content_type = "image/jpeg"
# Check if the request was successful
if response.status_code == 200:
# Get the image content
image_content = response.content
# Encode the image content as base64
base64_image = base64.b64encode(image_content)
return base64_image.decode("utf-8")
return base64_image.decode("utf-8"), content_type
else:
raise HTTPException(
status_code=500, detail="Unable to access the image url"
@@ -199,13 +216,14 @@ class ClaudeModel(BedrockModel):
if isinstance(part, TextContent):
content_parts.append(part.model_dump())
else:
image_data, content_type = self._get_base64_image(part.image_url.url)
content_parts.append(
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": self._get_base64_image(part.image_url.url),
"media_type": content_type,
"data": image_data,
},
}
)