Initial commit

This commit is contained in:
Aiden Dai
2024-03-27 15:20:24 +08:00
parent f77df2c536
commit f974cb2728
21 changed files with 2149 additions and 5 deletions

19
.flake8 Normal file
View File

@@ -0,0 +1,19 @@
[flake8]
max-line-length = 120
ignore =
E203,W191,W503
exclude =
build
.git
__pycache__
.tox
venv
.venv
.venv-test
tmp*
deployment
cdk.out
node_modules
max-complexity = 10
require-code = True

162
.gitignore vendored Normal file
View File

@@ -0,0 +1,162 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
Config

244
README.md
View File

@@ -1,11 +1,245 @@
## My Project [中文](./README_CN.md)
TODO: Fill this README out! # Bedrock Access Gateway
Be sure to: OpenAI-Compatible RESTful APIs for Amazon Bedrock
* Change the title in this README ## Overview
* Edit your repository description on GitHub
Amazon Bedrock offers a wide range of foundation models (such as Claude 3 Sonnet/Haiku, Llama 2, Mistral/Mixtral etc.)
and a broad set of capabilities for you to build generative AI applications.
Check [Amazon Bedrock](https://aws.amazon.com/bedrock) for more details.
Sometimes, you might have applications developed using OpenAI APIs or SDKs, and you want to experiment with Amazon
Bedrock without modifying your codebase. Or you may simply wish to evaluate the capabilities of these foundation models
in tools like AutoGen etc. Well, this repository allows you to access Amazon Bedrock models seamlessly through OpenAI
APIs and SDKs, enabling you to test these models without code changes.
If you find this GitHub repository useful, please consider giving it a free star to show your appreciation and support
for the project.
Features:
- [x] Support streaming response via server-sent events (SSE)
- [x] Support Model APIs
- [x] Support Chat Completion APIs
- [ ] Support Function Call/Tool Call
- [ ] Support Embedding APIs
- [ ] Support Image APIs
> NOTE: 1. The legacy [text completion](https://platform.openai.com/docs/api-reference/completions) API is not
> supported, you should move to chat completion API. 2. May support other APIs such as fine-tuning, Assistants API etc.
> in the future.
Supported Amazon Bedrock models (Model IDs):
- anthropic.claude-instant-v1
- anthropic.claude-v2:1
- anthropic.claude-v2
- anthropic.claude-3-sonnet-20240229-v1:0
- anthropic.claude-3-haiku-20240307-v1:0
- meta.llama2-13b-chat-v1
- meta.llama2-70b-chat-v1
- mistral.mistral-7b-instruct-v0:2
- mistral.mixtral-8x7b-instruct-v0:1
> Note: The default model is set to `anthropic.claude-3-sonnet-20240229-v1:0`. You can change it via Lambda environment
> variables.
## Get Started
### Prerequisites
Please make sure you have met below prerequisites:
- Access to Amazon Bedrock foundation models.
If you haven't got model access, please follow
the [Set Up](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) guide
### Architecture
The following diagram illustrates the solution architecture. Note that it also includes a new **VPC** with two public
subnets only for the Application Load Balancer (ALB).
![Architecture](assets/arch.svg)
### Deployment
Please follow below steps to deploy the Bedrock Proxy APIs into your AWS account. Only support regions where Amazon
Bedrock is available (such as us-west-2). The deployment will take approximately 3-5 minutes.
**Step 1: Create you own custom API key (Optional)**
> NOTE: This step is to use any string (without spaces) you like to create a custom API Key (credential) that will be
> used to access the proxy API later. This key does not have to match your actual OpenAI key, and you don't even need to
> have an OpenAI API key. It is recommended that you take this step and ensure that you keep the key safe and private.
1. Open the AWS Management Console and navigate to the Systems Manager service.
2. In the left-hand navigation pane, click on "Parameter Store".
3. Click on the "Create parameter" button.
4. In the "Create parameter" window, select the following options:
- Name: Enter a descriptive name for your parameter (e.g., "BedrockProxyAPIKey").
- Description: Optionally, provide a description for the parameter.
- Tier: Select **Standard**.
- Type: Select **SecureString**.
- Value: Any string (without spaces).
5. Click "Create parameter".
6. Make a note of the parameter name you used (e.g., "BedrockProxyAPIKey"). You'll need this in the next step.
**Step 2: Deploy the CloudFormation stack**
1. Sign in to AWS Management Console, switch to the region to deploy the CloudFormation Stack to.
2. Click the following button to launch the CloudFormation Stack in that region.
[![Launch Stack](assets/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home#/stacks/create/template?stackName=BedrockProxyAPI&templateURL=https://aws-gcr-solutions.s3.amazonaws.com/bedrock-proxy-api/latest/BedrockProxy.template)
3. Click "Next".
4. On the "Specify stack details" page, provide the following information:
- Stack name: Change the stack name if needed.
- ApiKeyParam (if you set up an API key in Step 1): Enter the parameter name you used for storing the API key (
e.g., "BedrockProxyAPIKey"). If you did not set up an API key, leave this field blank.
Click "Next".
5. On the "Configure stack options" page, you can leave the default settings or customize them according to your needs.
6. Click "Next".
7. On the "Review" page, review the details of the stack you're about to create. Check the "I acknowledge that AWS
CloudFormation might create IAM resources" checkbox at the bottom.
8. Click "Create stack".
That is it! Once deployed, click the CloudFormation stack and go to **Outputs** tab, you can find the API Base URL
from `APIBaseUrl`, the value should look like `http://xxxx.xxx.elb.amazonaws.com/api/v1`.
### SDK/API Usage
All you need is the API Key and the API Base URL. And if you didn't
set up your own key, then the default API Key `bedrock` will be used.
Now, you can try out the proxy APIs. Let's say you want to test Claude 3 Sonnet model, then
use `anthropic.claude-3-sonnet-20240229-v1:0` as the Model ID.
- **Example API Usage**
```bash
curl https://<API base url>/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <API Key>" \
-d '{
"model": "anthropic.claude-3-sonnet-20240229-v1:0",
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
}'
```
- **Example SDK Usage**
```bash
export OPENAI_API_KEY=<API key>
export OPENAI_API_BASE=<API base url>
```
```python
from openai import OpenAI
client = OpenAI()
completion = client.chat.completions.create(
model="anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{"role": "user", "content": "Hello!"}],
)
print(completion.choices[0].message.content)
```
## Other Examples
### AutoGen
Below is an image of setting up the model in AutoGen studio.
![AutoGen Model](assets/autogen-model.png)
### LangChain
Make sure you use `ChatOpenAI(...)` instead of `OpenAI(...)`
```python
# pip install langchain-openai
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
chat = ChatOpenAI(
model="anthropic.claude-3-sonnet-20240229-v1:0",
temperature=0,
openai_api_key="xxxx",
openai_api_base="http://xxx.elb.amazonaws.com/api/v1",
)
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)
llm_chain = LLMChain(prompt=prompt, llm=chat)
question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
response = llm_chain.invoke(question)
print(response)
```
## FAQs
### About Privacy
This application does not collect any of your data. Furthermore, it does not log any requests or responses by default.
### Why not used API Gateway instead of Application Load Balancer?
Short answer is that API Gateway does not support server-sent events (SSE) for streaming response.
### Which regions are supported?
This solution only supports the regions where Amazon Bedrock is available, so:
- US East (N. Virginia)
- US West (Oregon)
- Asia Pacific (Singapore)
- Asia Pacific (Tokyo)
- Europe (Frankfurt)
Note that not all models are available in those regions.
### Can I build and use my own ECR image
Yes, you can clone the repo and build the container image by yourself (src/Dockerfile) and then push to your ECR repo.
Replace the repo url in the CloudFormation template before you deploy.
### Can I run this locally
Yes, you can run this locally, then the API base url should be like `http://localhost:8000/api/v1`
### Any performance sacrifice or latency increase by using the proxy APIs
This is yet to be tested. But you should use this solution for PoC only.
### Any plan to support SageMaker models?
Currently, there is no plan of supporting SageMaker models. This depends on if there are customer asks.
### Any plan to support Bedrock custom models?
Fine-tuned models and models with Provisioned Throughput are not supported. You can clone the repo and make the
customization if needed.
### How to upgrade?
If there is no changes on architecture, you can simply deploy the latest image to your Lambda to use the new
features (manually) without redeploying the whole CloudFormation stack.
## Security ## Security

236
README_CN.md Normal file
View File

@@ -0,0 +1,236 @@
[English](./README.md)
# Bedrock Access Gateway
使用兼容OpenAI的API访问Amazon Bedrock
## 概述
Amazon Bedrock提供了广泛的基础模型(如Claude 3 Sonnet/Haiku、Llama 2、Mistral/Mixtral等)
,以及构建生成式AI应用程序的多种功能。更多详细信息,请查看[Amazon Bedrock](https://aws.amazon.com/bedrock)。
有时,您可能已经使用OpenAI的API或SDK构建了应用程序,并希望在不修改代码的情况下试用Amazon
Bedrock的模型。或者,您可能只是希望在AutoGen等工具中评估这些基础模型的功能。 好消息是, 这里提供了一种方便的途径,让您可以使用
OpenAI 的 API 或 SDK 无缝集成并试用 Amazon Bedrock 的模型,而无需对现有代码进行修改。
如果您觉得这个项目有用,请考虑给它点个一个免费的小星星。
功能列表:
- [x] 支持 server-sent events (SSE)的流式响应
- [x] 支持 Model APIs
- [x] 支持 Chat Completion APIs
- [ ] 支持 Function Call/Tool Call
- [ ] 支持 Embedding APIs
- [ ] 支持 Image APIs
> 注意: 1不支持旧的 [text completion](https://platform.openai.com/docs/api-reference/completions) API请更改为使用Chat
> Completion API。 2.未来可能支持其他API, 如Fine-tune、Assistants API等。
支持的Amazon Bedrock模型列表Model IDs
- anthropic.claude-instant-v1
- anthropic.claude-v2:1
- anthropic.claude-v2
- anthropic.claude-3-sonnet-20240229-v1:0
- anthropic.claude-3-haiku-20240307-v1:0
- meta.llama2-13b-chat-v1
- meta.llama2-70b-chat-v1
- mistral.mistral-7b-instruct-v0:2
- mistral.mixtral-8x7b-instruct-v0:1
> Note: 默认模型为 `anthropic.claude-3-sonnet-20240229-v1:0` 可以通过更改Lambda环境变量进行更改。
## 使用指南
### 前提条件
请确保您已满足以下先决条件:
- 可以访问Amazon Bedrock基础模型。
如果您还没有获得模型访问权限,请参考[配置](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)指南。
### 架构图
下图展示了本方案的架构。请注意,它还包括一个新的**VPC**,其中只有两个公共子网用于应用程序负载均衡器(ALB)。
![Architecture](assets/arch.svg)
### 部署
请按以下步骤将Bedrock代理API部署到您的AWS账户中。仅支持Amazon Bedrock可用的区域(如us-west-2)。
**第一步: 自定义您的API Key (可选)**
> 注意:这一步是使用任意字符串不带空格创建一个自定义的API Key(凭证),将用于后续访问代理API。此API Key不必与您实际的OpenAI
> Key一致,您甚至无需拥有OpenAI API Key。建议您执行此步操作并且请确保保管好此API Key。
1. 打开AWS管理控制台,导航到Systems Manager服务。
2. 在左侧导航窗格中,单击"参数存储"。
3. 单击"创建参数"按钮。
4. 在"创建参数"窗口中,选择以下选项:
- 名称:输入参数的描述性名称(例如"BedrockProxyAPIKey")。
- 描述:可选,为参数提供描述。
- 层级:选择**标准**。
- 类型:选择**SecureString**。
- 值: 随意字符串(不带空格)。
5. 单击"创建参数"。
6. 记录您使用的参数名称(例如"BedrockProxyAPIKey")。您将在下一步中需要它。
**第二步: 部署CloudFormation堆栈**
1. 登录AWS管理控制台,切换到要部署CloudFormation堆栈的区域。
2. 单击以下按钮在该区域启动CloudFormation堆栈。
[![Launch Stack](assets/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home#/stacks/create/template?stackName=BedrockProxyAPI&templateURL=https://aws-gcr-solutions.s3.amazonaws.com/bedrock-proxy-api/latest/BedrockProxy.template)
3. 单击"下一步"。
4. 在"指定堆栈详细信息"页面,提供以下信息:
- 堆栈名称: 可以根据需要更改名称。
- ApiKeyParam(如果在步骤1中设置了API密钥):输入您用于存储API密钥的参数名称(例如"BedrockProxyAPIKey")
。如果您没有设置API密钥,请将此字段留空。
单击"下一步"。
5. 在"配置堆栈选项"页面,您可以保留默认设置或根据需要进行自定义。
6. 单击"下一步"。
7. 在"审核"页面,查看您即将创建的堆栈详细信息。勾选底部的"我确认AWS CloudFormation 可能创建 IAM 资源。"复选框。
8. 单击"创建堆栈"。
仅此而已。部署完成后,点击CloudFormation堆栈,进入"输出"选项卡,你可以从"APIBaseUrl"
中找到API Base URL,它应该类似于`http://xxxx.xxx.elb.amazonaws.com/api/v1` 这样的格式。
### SDK/API使用
你只需要API Key和API Base URL。如果你没有设置自己的密钥,那么默认将使用API Key `bedrock`
现在,你可以尝试使用代理API了。假设你想测试Claude 3 Sonnet模型,那么使用"anthropic.claude-3-sonnet-20240229-v1:0"作为模型ID。
- **API 使用示例**
```bash
curl https://<API base url>/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer <API Key>" \
-d '{
"model": "anthropic.claude-3-sonnet-20240229-v1:0",
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
}'
```
- **SDK 使用示例**
```bash
export OPENAI_API_KEY=<API key>
export OPENAI_API_BASE=<API base url>
```
```python
from openai import OpenAI
client = OpenAI()
completion = client.chat.completions.create(
model="anthropic.claude-3-sonnet-20240229-v1:0",
messages=[{"role": "user", "content": "Hello!"}],
)
print(completion.choices[0].message.content)
```
## 其他例子
### AutoGen
例如在AutoGen studio配置和使用模型
![AutoGen Model](assets/autogen-model.png)
### LangChain
请确保使用的示`ChatOpenAI(...)` ,而不是`OpenAI(...)`
```python
# pip install langchain-openai
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
chat = ChatOpenAI(
model="anthropic.claude-3-sonnet-20240229-v1:0",
temperature=0,
openai_api_key="xxxx",
openai_api_base="http://xxx.elb.amazonaws.com/api/v1",
)
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)
llm_chain = LLMChain(prompt=prompt, llm=chat)
question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
response = llm_chain.invoke(question)
print(response)
```
## FAQs
### 关于隐私
这个方案不会收集您的任何数据。而且,它默认情况下也不会记录任何请求或响应。
### 为什么没有使用API Gateway 而是使用了Application Load Balancer?
简单的答案是API Gateway不支持 server-sent events (SSE) 用于流式响应。
### 支持哪些区域?
只支持Amazon Bedrock可用的区域,即:
- 美国东部(弗吉尼亚北部)
- 美国西部(俄勒冈州)
- 亚太地区(新加坡)
- 亚太地区(东京)
- 欧洲(法兰克福)
注意,并非所有模型都在上面区可用。
### 我可以构建并使用自己的ECR镜像吗?
是的,你可以克隆repo并自行构建容器镜像(src/Dockerfile),然后推送到你自己的ECR仓库。
在部署之前,请在CloudFormation模板中替换镜像仓库URL。
### 我可以在本地运行吗?
是的,你可以在本地运行,那么API Base URL应该类似于`http://localhost:8000/api/v1`
### 使用代理API会有任何性能牺牲或延迟增加吗?
这还有待测试。但你应该只将此解决方案用于概念验证。
### 有计划支持SageMaker模型吗?
目前没有支持SageMaker模型的计划。这取决于是否有客户需求。
### 有计划支持Bedrock自定义模型吗?
不支持微调模型和设置了已预配吞吐量的模型。如有需要,你可以克隆repo并进行自定义。
### 如何升级?
如果架构没有变化,你可以简单地将最新镜像部署到Lambda中,以使用新功能(手动),而无需重新部署整个CloudFormation堆栈。
## 安全
更多信息,请参阅[CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications)。
## 许可证
本项目根据MIT-0许可证获得许可。请参阅LICENSE文件。

4
assets/arch.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 25 KiB

BIN
assets/autogen-agent.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

BIN
assets/autogen-model.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 212 KiB

BIN
assets/launch-stack.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.3 KiB

View File

@@ -0,0 +1,805 @@
{
"Parameters": {
"ApiKeyParam": {
"Type": "String",
"Default": "",
"Description": "The parameter name in System Manager used to store the API Key, leave blank to use a default key"
}
},
"Resources": {
"VPCB9E5F0B4": {
"Type": "AWS::EC2::VPC",
"Properties": {
"CidrBlock": "10.250.0.0/16",
"EnableDnsHostnames": true,
"EnableDnsSupport": true,
"InstanceTenancy": "default",
"Tags": [
{
"Key": "Name",
"Value": "BedrockProxy/VPC"
}
]
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/Resource"
}
},
"VPCPublicSubnet1SubnetB4246D30": {
"Type": "AWS::EC2::Subnet",
"Properties": {
"AvailabilityZone": {
"Fn::Select": [
0,
{
"Fn::GetAZs": ""
}
]
},
"CidrBlock": "10.250.0.0/24",
"MapPublicIpOnLaunch": true,
"Tags": [
{
"Key": "aws-cdk:subnet-name",
"Value": "Public"
},
{
"Key": "aws-cdk:subnet-type",
"Value": "Public"
},
{
"Key": "Name",
"Value": "BedrockProxy/VPC/PublicSubnet1"
}
],
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/Subnet"
}
},
"VPCPublicSubnet1RouteTableFEE4B781": {
"Type": "AWS::EC2::RouteTable",
"Properties": {
"Tags": [
{
"Key": "Name",
"Value": "BedrockProxy/VPC/PublicSubnet1"
}
],
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/RouteTable"
}
},
"VPCPublicSubnet1RouteTableAssociation0B0896DC": {
"Type": "AWS::EC2::SubnetRouteTableAssociation",
"Properties": {
"RouteTableId": {
"Ref": "VPCPublicSubnet1RouteTableFEE4B781"
},
"SubnetId": {
"Ref": "VPCPublicSubnet1SubnetB4246D30"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/RouteTableAssociation"
}
},
"VPCPublicSubnet1DefaultRoute91CEF279": {
"Type": "AWS::EC2::Route",
"Properties": {
"DestinationCidrBlock": "0.0.0.0/0",
"GatewayId": {
"Ref": "VPCIGWB7E252D3"
},
"RouteTableId": {
"Ref": "VPCPublicSubnet1RouteTableFEE4B781"
}
},
"DependsOn": [
"VPCVPCGW99B986DC"
],
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/DefaultRoute"
}
},
"VPCPublicSubnet2Subnet74179F39": {
"Type": "AWS::EC2::Subnet",
"Properties": {
"AvailabilityZone": {
"Fn::Select": [
1,
{
"Fn::GetAZs": ""
}
]
},
"CidrBlock": "10.250.1.0/24",
"MapPublicIpOnLaunch": true,
"Tags": [
{
"Key": "aws-cdk:subnet-name",
"Value": "Public"
},
{
"Key": "aws-cdk:subnet-type",
"Value": "Public"
},
{
"Key": "Name",
"Value": "BedrockProxy/VPC/PublicSubnet2"
}
],
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/Subnet"
}
},
"VPCPublicSubnet2RouteTable6F1A15F1": {
"Type": "AWS::EC2::RouteTable",
"Properties": {
"Tags": [
{
"Key": "Name",
"Value": "BedrockProxy/VPC/PublicSubnet2"
}
],
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/RouteTable"
}
},
"VPCPublicSubnet2RouteTableAssociation5A808732": {
"Type": "AWS::EC2::SubnetRouteTableAssociation",
"Properties": {
"RouteTableId": {
"Ref": "VPCPublicSubnet2RouteTable6F1A15F1"
},
"SubnetId": {
"Ref": "VPCPublicSubnet2Subnet74179F39"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/RouteTableAssociation"
}
},
"VPCPublicSubnet2DefaultRouteB7481BBA": {
"Type": "AWS::EC2::Route",
"Properties": {
"DestinationCidrBlock": "0.0.0.0/0",
"GatewayId": {
"Ref": "VPCIGWB7E252D3"
},
"RouteTableId": {
"Ref": "VPCPublicSubnet2RouteTable6F1A15F1"
}
},
"DependsOn": [
"VPCVPCGW99B986DC"
],
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/DefaultRoute"
}
},
"VPCIGWB7E252D3": {
"Type": "AWS::EC2::InternetGateway",
"Properties": {
"Tags": [
{
"Key": "Name",
"Value": "BedrockProxy/VPC"
}
]
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/IGW"
}
},
"VPCVPCGW99B986DC": {
"Type": "AWS::EC2::VPCGatewayAttachment",
"Properties": {
"InternetGatewayId": {
"Ref": "VPCIGWB7E252D3"
},
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/VPC/VPCGW"
}
},
"ProxyApiHandlerServiceRoleBE71BFB1": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": "lambda.amazonaws.com"
}
}
],
"Version": "2012-10-17"
},
"ManagedPolicyArns": [
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
]
]
}
]
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/ServiceRole/Resource"
}
},
"ProxyApiHandlerServiceRoleDefaultPolicy86681202": {
"Type": "AWS::IAM::Policy",
"Properties": {
"PolicyDocument": {
"Statement": [
{
"Action": [
"bedrock:InvokeModel",
"bedrock:InvokeModelWithResponseStream"
],
"Effect": "Allow",
"Resource": "arn:aws:bedrock:*::foundation-model/*"
},
{
"Action": [
"ssm:DescribeParameters",
"ssm:GetParameters",
"ssm:GetParameter",
"ssm:GetParameterHistory"
],
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":ssm:",
{
"Ref": "AWS::Region"
},
":",
{
"Ref": "AWS::AccountId"
},
":parameter/",
{
"Ref": "ApiKeyParam"
}
]
]
}
}
],
"Version": "2012-10-17"
},
"PolicyName": "ProxyApiHandlerServiceRoleDefaultPolicy86681202",
"Roles": [
{
"Ref": "ProxyApiHandlerServiceRoleBE71BFB1"
}
]
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/ServiceRole/DefaultPolicy/Resource"
}
},
"ProxyApiHandlerEC15A492": {
"Type": "AWS::Lambda::Function",
"Properties": {
"Architectures": [
"arm64"
],
"Code": {
"ImageUri": {
"Fn::Join": [
"",
[
{
"Fn::Select": [
4,
{
"Fn::Split": [
":",
{
"Fn::FindInMap": [
"ProxyRegionTable03E5BEB3",
{
"Ref": "AWS::Region"
},
"repoArn"
]
}
]
}
]
},
".dkr.ecr.",
{
"Fn::Select": [
3,
{
"Fn::Split": [
":",
{
"Fn::FindInMap": [
"ProxyRegionTable03E5BEB3",
{
"Ref": "AWS::Region"
},
"repoArn"
]
}
]
}
]
},
".",
{
"Ref": "AWS::URLSuffix"
},
"/bedrock-proxy-api:latest"
]
]
}
},
"Description": "Bedrock Proxy API Handler",
"Environment": {
"Variables": {
"API_KEY_PARAM_NAME": {
"Ref": "ApiKeyParam"
},
"DEBUG": "false",
"DEFAULT_MODEL": {
"Fn::FindInMap": [
"ProxyRegionTable03E5BEB3",
{
"Ref": "AWS::Region"
},
"model"
]
}
}
},
"MemorySize": 1024,
"PackageType": "Image",
"Role": {
"Fn::GetAtt": [
"ProxyApiHandlerServiceRoleBE71BFB1",
"Arn"
]
},
"Timeout": 300
},
"DependsOn": [
"ProxyApiHandlerServiceRoleDefaultPolicy86681202",
"ProxyApiHandlerServiceRoleBE71BFB1"
],
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/Resource"
}
},
"ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779": {
"Type": "AWS::Lambda::Permission",
"Properties": {
"Action": "lambda:InvokeFunction",
"FunctionName": {
"Fn::GetAtt": [
"ProxyApiHandlerEC15A492",
"Arn"
]
},
"Principal": "elasticloadbalancing.amazonaws.com"
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/Invoke2UTWxhlfyqbT5FTn--5jvgbLgj+FfJwzswGk55DU1H--Y="
}
},
"ProxyALB87756780": {
"Type": "AWS::ElasticLoadBalancingV2::LoadBalancer",
"Properties": {
"LoadBalancerAttributes": [
{
"Key": "deletion_protection.enabled",
"Value": "false"
}
],
"Scheme": "internet-facing",
"SecurityGroups": [
{
"Fn::GetAtt": [
"ProxyALBSecurityGroup0D6CA3DA",
"GroupId"
]
}
],
"Subnets": [
{
"Ref": "VPCPublicSubnet1SubnetB4246D30"
},
{
"Ref": "VPCPublicSubnet2Subnet74179F39"
}
],
"Type": "application"
},
"DependsOn": [
"VPCPublicSubnet1DefaultRoute91CEF279",
"VPCPublicSubnet1RouteTableAssociation0B0896DC",
"VPCPublicSubnet2DefaultRouteB7481BBA",
"VPCPublicSubnet2RouteTableAssociation5A808732"
],
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ALB/Resource"
}
},
"ProxyALBSecurityGroup0D6CA3DA": {
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": "Automatically created Security Group for ELB BedrockProxyALB1CE4CAD1",
"SecurityGroupEgress": [
{
"CidrIp": "255.255.255.255/32",
"Description": "Disallow all traffic",
"FromPort": 252,
"IpProtocol": "icmp",
"ToPort": 86
}
],
"SecurityGroupIngress": [
{
"CidrIp": "0.0.0.0/0",
"Description": "Allow from anyone on port 80",
"FromPort": 80,
"IpProtocol": "tcp",
"ToPort": 80
}
],
"VpcId": {
"Ref": "VPCB9E5F0B4"
}
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ALB/SecurityGroup/Resource"
}
},
"ProxyALBListener933E9515": {
"Type": "AWS::ElasticLoadBalancingV2::Listener",
"Properties": {
"DefaultActions": [
{
"TargetGroupArn": {
"Ref": "ProxyALBListenerTargetsGroup187739FA"
},
"Type": "forward"
}
],
"LoadBalancerArn": {
"Ref": "ProxyALB87756780"
},
"Port": 80,
"Protocol": "HTTP"
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ALB/Listener/Resource"
}
},
"ProxyALBListenerTargetsGroup187739FA": {
"Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
"Properties": {
"HealthCheckEnabled": false,
"TargetType": "lambda",
"Targets": [
{
"Id": {
"Fn::GetAtt": [
"ProxyApiHandlerEC15A492",
"Arn"
]
}
}
]
},
"DependsOn": [
"ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779"
],
"Metadata": {
"aws:cdk:path": "BedrockProxy/Proxy/ALB/Listener/TargetsGroup/Resource"
}
},
"CDKMetadata": {
"Type": "AWS::CDK::Metadata",
"Properties": {
"Analytics": "v2:deflate64:H4sIAAAAAAAA/1VRXW/CMAz8LbyHDMovAKZNSJtWFcTr5LpeZ0iTKHFAqOp/n1q+uief7y7ynZLp+WKhZxM4xylWx6nhUrdbATyq9Y/NIUBDQkHBOX63hJlu9x57aZ+vVZ5Kw7hNpSXpuScqXBLaQWnoyT+5ZYwOGYSdfZh7sLFCwZK8g9AZLrczt20pAvjbkBW1JUyB5fIeXPLDgTHRKcKgC/IusrhwWUEkZaApK9Dtq8MjhU0DNb0li/cIY5xTaDhGdrZTDI1uC3etMczcGcYh2hV1igxEYTQOqhIMWGRbnzLdLr03jEPLDwfVatAo9E//7WMfRyF789zxSN9BqEketUdr16mCoksBh6if4D3buodfSXy6fsrIsHa2Yhk6WleRPsSXUzbT87meTQ6ReRqSFW5IF9f5B/Z2H8goAgAA"
},
"Metadata": {
"aws:cdk:path": "BedrockProxy/CDKMetadata/Default"
},
"Condition": "CDKMetadataAvailable"
}
},
"Mappings": {
"ProxyRegionTable03E5BEB3": {
"us-east-1": {
"repoArn": "arn:aws:ecr:us-east-1:366590864501:repository/bedrock-proxy-api",
"model": "anthropic.claude-3-sonnet-20240229-v1:0"
},
"us-west-2": {
"repoArn": "arn:aws:ecr:us-west-2:366590864501:repository/bedrock-proxy-api",
"model": "anthropic.claude-3-sonnet-20240229-v1:0"
},
"ap-southeast-1": {
"repoArn": "arn:aws:ecr:ap-southeast-1:366590864501:repository/bedrock-proxy-api",
"model": "anthropic.claude-v2"
},
"ap-northeast-1": {
"repoArn": "arn:aws:ecr:ap-northeast-1:366590864501:repository/bedrock-proxy-api",
"model": "anthropic.claude-v2:1"
},
"eu-central-1": {
"repoArn": "arn:aws:ecr:eu-central-1:366590864501:repository/bedrock-proxy-api",
"model": "anthropic.claude-v2:1"
}
}
},
"Outputs": {
"APIBaseUrl": {
"Description": "Proxy API Base URL (OPENAI_API_BASE)",
"Value": {
"Fn::Join": [
"",
[
"http://",
{
"Fn::GetAtt": [
"ProxyALB87756780",
"DNSName"
]
},
"/api/v1"
]
]
}
}
},
"Conditions": {
"CDKMetadataAvailable": {
"Fn::Or": [
{
"Fn::Or": [
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"af-south-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-east-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-northeast-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-northeast-2"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-south-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-southeast-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ap-southeast-2"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"ca-central-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"cn-north-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"cn-northwest-1"
]
}
]
},
{
"Fn::Or": [
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-central-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-north-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-south-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-west-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-west-2"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"eu-west-3"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"il-central-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"me-central-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"me-south-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"sa-east-1"
]
}
]
},
{
"Fn::Or": [
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"us-east-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"us-east-2"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"us-west-1"
]
},
{
"Fn::Equals": [
{
"Ref": "AWS::Region"
},
"us-west-2"
]
}
]
}
]
}
}
}

9
src/Dockerfile Normal file
View File

@@ -0,0 +1,9 @@
FROM public.ecr.aws/lambda/python:3.12
COPY ./api ./api
COPY requirements.txt .
RUN pip3 install -r requirements.txt -U --no-cache-dir
CMD [ "api.app.handler" ]

0
src/api/__init__.py Normal file
View File

52
src/api/app.py Normal file
View File

@@ -0,0 +1,52 @@
import logging
import uvicorn
from fastapi import FastAPI
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import PlainTextResponse
from mangum import Mangum
from api.routers import model, chat
from api.setting import API_ROUTE_PREFIX, TITLE, DESCRIPTION, SUMMARY, VERSION
config = {
"title": TITLE,
"description": DESCRIPTION,
"summary": SUMMARY,
"version": VERSION,
}
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
app = FastAPI(**config)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(model.router, prefix=API_ROUTE_PREFIX)
app.include_router(chat.router, prefix=API_ROUTE_PREFIX)
@app.get("/health")
async def health():
"""For health check if needed"""
return {"status": "OK"}
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request, exc):
return PlainTextResponse(str(exc), status_code=400)
handler = Mangum(app)
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

28
src/api/auth.py Normal file
View File

@@ -0,0 +1,28 @@
import os
from typing import Annotated
import boto3
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from api.setting import DEFAULT_API_KEYS
api_key_param = os.environ.get("API_KEY_PARAM_NAME")
if api_key_param:
ssm = boto3.client("ssm")
api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"][
"Value"
]
else:
api_key = DEFAULT_API_KEYS
security = HTTPBearer()
def api_key_auth(
credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)]
):
if credentials.credentials != api_key:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key"
)

View File

@@ -0,0 +1 @@
from api.models.bedrock import ClaudeModel, SUPPORTED_BEDROCK_MODELS, get_model

391
src/api/models/bedrock.py Normal file
View File

@@ -0,0 +1,391 @@
import json
import logging
import uuid
from abc import ABC, abstractmethod
from typing import AsyncIterable
import boto3
from api.schema import (
ChatResponse,
ChatRequest,
ChatRequestMessage,
Choice,
ChatResponseMessage,
Usage,
ChatStreamResponse,
ChoiceDelta,
)
from api.setting import DEBUG, AWS_REGION
logger = logging.getLogger(__name__)
bedrock_runtime = boto3.client(
service_name="bedrock-runtime",
region_name=AWS_REGION,
)
SUPPORTED_BEDROCK_MODELS = {
"anthropic.claude-instant-v1": "Claude Instant",
"anthropic.claude-v2:1": "Claude",
"anthropic.claude-v2": "Claude",
"anthropic.claude-3-sonnet-20240229-v1:0": "Claude 3 Sonnet",
"anthropic.claude-3-haiku-20240307-v1:0": "Claude 3 Haiku",
"meta.llama2-13b-chat-v1": "Llama 2 Chat 13B",
"meta.llama2-70b-chat-v1": "Llama 2 Chat 70B",
"mistral.mistral-7b-instruct-v0:2": "Mistral 7B Instruct",
"mistral.mixtral-8x7b-instruct-v0:1": "Mixtral 8x7B Instruct",
}
class BaseChatModel(ABC):
"""Represent a basic chat model
Currently, only Bedrock model is supported, but may be used for SageMaker models if needed.
"""
@abstractmethod
def chat(self, chat_request: ChatRequest) -> ChatResponse:
"""Handle a basic chat completion requests."""
pass
@abstractmethod
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
"""Handle a basic chat completion requests with stream response."""
pass
def _generate_message_id(self) -> str:
return "chatcmpl-" + str(uuid.uuid4())[:8]
def _stream_response_to_bytes(self, response: ChatStreamResponse) -> bytes:
return "data: {}\n\n".format(response.model_dump_json()).encode("utf-8")
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html
class BedrockModel(BaseChatModel):
accept = "application/json"
content_type = "application/json"
def _invoke_model(self, args: dict, model_id: str, with_stream: bool = False):
body = json.dumps(args)
if DEBUG:
logger.info("Invoke Bedrock Model: " + model_id)
logger.info("Bedrock request body: " + body)
if with_stream:
return bedrock_runtime.invoke_model_with_response_stream(
body=body,
modelId=model_id,
accept=self.accept,
contentType=self.content_type,
)
return bedrock_runtime.invoke_model(
body=body,
modelId=model_id,
accept=self.accept,
contentType=self.content_type,
)
def _create_response(
self,
model: str,
message: str,
message_id: str,
input_tokens: int = 0,
output_tokens: int = 0,
) -> ChatResponse:
choice = Choice(
index=0,
message=ChatResponseMessage(
role="assistant",
content=message,
),
finish_reason="stop",
)
response = ChatResponse(
id=message_id,
model=model,
choices=[choice],
usage=Usage(
prompt_tokens=input_tokens,
completion_tokens=output_tokens,
total_tokens=input_tokens + output_tokens,
),
)
if DEBUG:
logger.info("Proxy response :" + response.model_dump_json())
return response
def _create_response_stream(
self, model: str, message_id: str, chunk_message: str, finish_reason: str | None
) -> ChatStreamResponse:
choice = ChoiceDelta(
index=0,
delta=ChatResponseMessage(
role="assistant",
content=chunk_message,
),
finish_reason=finish_reason,
)
response = ChatStreamResponse(
id=message_id,
model=model,
choices=[choice],
)
if DEBUG:
logger.info("Proxy response :" + response.model_dump_json())
return response
def get_model(model_id: str) -> BedrockModel:
model_name = SUPPORTED_BEDROCK_MODELS.get(model_id, "")
if DEBUG:
logger.info("model name is " + model_name)
if model_name in ["Claude Instant", "Claude", "Claude 3 Sonnet", "Claude 3 Haiku"]:
return ClaudeModel()
elif model_name in ["Llama 2 Chat 13B", "Llama 2 Chat 70B"]:
return Llama2Model()
elif model_name in ["Mistral 7B Instruct", "Mixtral 8x7B Instruct"]:
return MistralModel()
else:
logger.error("Unsupported model id " + model_id)
raise ValueError("Invalid model ID")
class ClaudeModel(BedrockModel):
anthropic_version = "bedrock-2023-05-31"
def _parse_args(self, chat_request: ChatRequest) -> dict:
args = {
"anthropic_version": self.anthropic_version,
"max_tokens": chat_request.max_tokens,
"top_p": chat_request.top_p,
"temperature": chat_request.temperature,
}
if chat_request.messages[0].role == "system":
args["system"] = chat_request.messages[0].content
args["messages"] = [
{"role": msg.role, "content": msg.content}
for msg in chat_request.messages[1:]
]
else:
args["messages"] = [
{"role": msg.role, "content": msg.content}
for msg in chat_request.messages
]
return args
def chat(self, chat_request: ChatRequest) -> ChatResponse:
response = self._invoke_model(
args=self._parse_args(chat_request), model_id=chat_request.model
)
response_body = json.loads(response.get("body").read())
if DEBUG:
logger.info("Bedrock response body: " + str(response_body))
return self._create_response(
model=chat_request.model,
message=response_body["content"][0]["text"],
message_id=response_body["id"],
input_tokens=response_body["usage"]["input_tokens"],
output_tokens=response_body["usage"]["output_tokens"],
)
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
response = self._invoke_model(
args=self._parse_args(chat_request),
model_id=chat_request.model,
with_stream=True,
)
msg_id = ""
chunk_id = 0
for event in response.get("body"):
if DEBUG:
logger.info("Bedrock response chunk: " + str(event))
chunk = json.loads(event["chunk"]["bytes"])
chunk_id += 1
if chunk["type"] == "message_start":
msg_id = chunk["message"]["id"]
continue
if chunk["type"] == "message_delta":
chunk_message = ""
finish_reason = "stop"
elif chunk["type"] == "content_block_delta":
chunk_message = chunk["delta"]["text"]
finish_reason = None
else:
continue
response = self._create_response_stream(
model=chat_request.model,
message_id=msg_id,
chunk_message=chunk_message,
finish_reason=finish_reason,
)
yield self._stream_response_to_bytes(response)
class Llama2Model(BedrockModel):
def _convert_prompt(self, messages: list[ChatRequestMessage]) -> str:
"""Create a prompt message follow below example:
<s>[INST] <<SYS>>\n{your_system_message}\n<</SYS>>\n\n{user_message_1} [/INST] {model_reply_1}</s>
<s>[INST] {user_message_2} [/INST]
"""
if DEBUG:
logger.info("Convert below messages to prompt for Llama 2: ")
for msg in messages:
logger.info(msg.model_dump_json())
bos_token = "<s>"
eos_token = "</s>"
prompt = bos_token + "[INST] "
start = 0
end_turn = False
if messages[0].role == "system":
prompt += "<<SYS>>\n" + messages[0].content + "\n<<SYS>>\n\n"
start = 1
# TODO: Add validation
for i in range(start, len(messages)):
msg = messages[i]
if msg.role == "user":
if end_turn:
prompt += bos_token + "[INST] "
prompt += msg.content + " [/INST] "
end_turn = False
else:
prompt += msg.content + eos_token
end_turn = True
if DEBUG:
logger.info("Converted prompt: " + prompt.replace("\n", "\\n"))
return prompt
def _parse_args(self, chat_request: ChatRequest) -> dict:
prompt = self._convert_prompt(chat_request.messages)
return {
"prompt": prompt,
"max_gen_len": chat_request.max_tokens,
"temperature": chat_request.temperature,
"top_p": chat_request.top_p,
}
def chat(self, chat_request: ChatRequest) -> ChatResponse:
response = self._invoke_model(
args=self._parse_args(chat_request), model_id=chat_request.model
)
response_body = json.loads(response.get("body").read())
if DEBUG:
logger.info("Bedrock response body: " + str(response_body))
message_id = self._generate_message_id()
return self._create_response(
model=chat_request.model,
message=response_body["generation"],
message_id=message_id,
input_tokens=response_body["prompt_token_count"],
output_tokens=response_body["generation_token_count"],
)
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
response = self._invoke_model(
args=self._parse_args(chat_request),
model_id=chat_request.model,
with_stream=True,
)
msg_id = ""
chunk_id = 0
for event in response.get("body"):
if DEBUG:
logger.info("Bedrock response chunk: " + str(event))
chunk = json.loads(event["chunk"]["bytes"])
chunk_id += 1
response = self._create_response_stream(
model=chat_request.model,
message_id=msg_id,
chunk_message=chunk["generation"],
finish_reason=chunk["stop_reason"],
)
yield self._stream_response_to_bytes(response)
class MistralModel(BedrockModel):
def _convert_prompt(self, messages: list[ChatRequestMessage]) -> str:
"""Create a prompt message follow below example:
<s>[INST] {your_system_message}\n{user_message_1} [/INST] {model_reply_1}</s>
<s>[INST] {user_message_2} [/INST]
"""
if DEBUG:
logger.info("Convert below messages to prompt for Llama 2: ")
for msg in messages:
logger.info(msg.model_dump_json())
bos_token = "<s>"
eos_token = "</s>"
prompt = bos_token + "[INST] "
start = 0
end_turn = False
if messages[0].role == "system":
prompt += messages[0].content + "\n"
start = 1
# TODO: Add validation
for i in range(start, len(messages)):
msg = messages[i]
if msg.role == "user":
if end_turn:
prompt += bos_token + "[INST] "
prompt += msg.content + " [/INST] "
end_turn = False
else:
prompt += msg.content + eos_token
end_turn = True
if DEBUG:
logger.info("Converted prompt: " + prompt.replace("\n", "\\n"))
return prompt
def _parse_args(self, chat_request: ChatRequest) -> dict:
prompt = self._convert_prompt(chat_request.messages)
return {
"prompt": prompt,
"max_tokens": chat_request.max_tokens,
"temperature": chat_request.temperature,
"top_p": chat_request.top_p,
}
def chat(self, chat_request: ChatRequest) -> ChatResponse:
response = self._invoke_model(
args=self._parse_args(chat_request), model_id=chat_request.model
)
response_body = json.loads(response.get("body").read())
if DEBUG:
logger.info("Bedrock response body: " + str(response_body))
message_id = self._generate_message_id()
return self._create_response(
model=chat_request.model,
message=response_body["outputs"][0]["text"],
message_id=message_id,
)
def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
response = self._invoke_model(
args=self._parse_args(chat_request),
model_id=chat_request.model,
with_stream=True,
)
msg_id = ""
chunk_id = 0
for event in response.get("body"):
if DEBUG:
logger.info("Bedrock response chunk: " + str(event))
chunk = json.loads(event["chunk"]["bytes"])
chunk_id += 1
response = self._create_response_stream(
model=chat_request.model,
message_id=msg_id,
chunk_message=chunk["outputs"][0]["text"],
finish_reason=chunk["outputs"][0]["stop_reason"],
)
yield self._stream_response_to_bytes(response)

View File

51
src/api/routers/chat.py Normal file
View File

@@ -0,0 +1,51 @@
from typing import Annotated
from fastapi import APIRouter, Depends, Body, HTTPException
from fastapi.responses import StreamingResponse
from api.auth import api_key_auth
from api.models import get_model, SUPPORTED_BEDROCK_MODELS
from api.schema import ChatRequest, ChatResponse, ChatStreamResponse
from api.setting import DEFAULT_MODEL
router = APIRouter()
router = APIRouter(
prefix="/chat",
tags=["items"],
dependencies=[Depends(api_key_auth)],
# responses={404: {"description": "Not found"}},
)
@router.post("/completions", response_model=ChatResponse | ChatStreamResponse)
async def chat_completions(
chat_request: Annotated[
ChatRequest,
Body(
examples=[
{
"model": "anthropic.claude-3-sonnet-20240229-v1:0",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
}
],
),
]
):
if chat_request.model.lower().startswith("gpt-"):
chat_request.model = DEFAULT_MODEL
if chat_request.model not in SUPPORTED_BEDROCK_MODELS.keys():
raise HTTPException(status_code=400, detail="Unsupported Model Id " + chat_request.model)
try:
model = get_model(chat_request.model)
if chat_request.stream:
return StreamingResponse(
content=model.chat_stream(chat_request), media_type="text/event-stream"
)
return model.chat(chat_request)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))

41
src/api/routers/model.py Normal file
View File

@@ -0,0 +1,41 @@
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Path
from api.auth import api_key_auth
from api.models import SUPPORTED_BEDROCK_MODELS
from api.schema import Models, Model
router = APIRouter()
router = APIRouter(
prefix="/models",
tags=["items"],
dependencies=[Depends(api_key_auth)],
# responses={404: {"description": "Not found"}},
)
async def validate_model_id(model_id: str):
if model_id not in SUPPORTED_BEDROCK_MODELS.keys():
raise HTTPException(status_code=400, detail="Unsupported Model Id")
@router.get("/", response_model=Models)
async def list_models():
model_list = [Model(id=model_id) for model_id in SUPPORTED_BEDROCK_MODELS.keys()]
return Models(data=model_list)
@router.get(
"/{model_id}",
response_model=Model,
)
async def get_model(
model_id: Annotated[
str,
Path(description="Model ID", example="anthropic.claude-3-sonnet-20240229-v1:0"),
]
):
await validate_model_id(model_id)
return Model(id=model_id)

80
src/api/schema.py Normal file
View File

@@ -0,0 +1,80 @@
import time
from typing import Literal
from pydantic import BaseModel, Field
class Model(BaseModel):
id: str
created: int = Field(default_factory=lambda: int(time.time()))
object: str | None = "model"
owned_by: str | None = "bedrock"
class Models(BaseModel):
object: str | None = "list"
data: list[Model] = []
class ChatRequestMessage(BaseModel):
name: str | None = None
role: Literal["user", "assistant", "system"]
content: str
class ChatRequest(BaseModel):
messages: list[ChatRequestMessage]
model: str
frequency_penalty: float | None = Field(default=0.0, le=2.0, ge=-2.0) # Not used
presence_penalty: float | None = Field(default=0.0, le=2.0, ge=-2.0) # Not used
stream: bool | None = False
temperature: float | None = Field(default=1.0, le=2.0, ge=0.0)
top_p: float | None = Field(default=1.0, le=1.0, ge=0.0)
user: str | None = None # Not used
max_tokens: int | None = 2048
n: int | None = 1 # Not used
class Usage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
class ChatResponseMessage(BaseModel):
# tool_calls
role: Literal["assistant"] | None = None
content: str | None = None
class BaseChoice(BaseModel):
index: int
finish_reason: str | None
logprobs: dict | None = None
class Choice(BaseChoice):
message: ChatResponseMessage
class ChoiceDelta(BaseChoice):
delta: ChatResponseMessage
class BaseChatResponse(BaseModel):
# id: str = Field(default_factory=lambda: "chatcmpl-" + str(uuid.uuid4())[:8])
id: str
created: int = Field(default_factory=lambda: int(time.time()))
model: str
system_fingerprint: str = "fp_e97c09dd4e26"
class ChatResponse(BaseChatResponse):
choices: list[Choice]
object: Literal["chat.completion"] = "chat.completion"
usage: Usage
class ChatStreamResponse(BaseChatResponse):
choices: list[ChoiceDelta]
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"

27
src/api/setting.py Normal file
View File

@@ -0,0 +1,27 @@
import os
DEFAULT_API_KEYS = "bedrock"
API_ROUTE_PREFIX = "/api/v1"
TITLE = "Amazon Bedrock Proxy APIs"
SUMMARY = "OpenAI-Compatible RESTful APIs for Amazon Bedrock"
VERSION = "0.1.0"
DESCRIPTION = """
Use OpenAI-Compatible RESTful APIs for Amazon Bedrock models.
List of Amazon Bedrock models currently supported:
- anthropic.claude-instant-v1
- anthropic.claude-v2:1
- anthropic.claude-v2
- anthropic.claude-3-sonnet-20240229-v1:0
- anthropic.claude-3-haiku-20240307-v1:0
- meta.llama2-13b-chat-v1
- meta.llama2-70b-chat-v1
- mistral.mistral-7b-instruct-v0:2
- mistral.mixtral-8x7b-instruct-v0:1
"""
DEBUG = os.environ.get("DEBUG", "false").lower() != "false"
AWS_REGION = os.environ.get("AWS_REGION", "us-west-2")
DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "anthropic.claude-3-sonnet-20240229-v1:0")

4
src/requirements.txt Normal file
View File

@@ -0,0 +1,4 @@
fastapi==0.103.0
pydantic==2.6.3
uvicorn==0.27.0.post1
mangum==0.17.0