Initial commit

2024-03-27 15:20:24 +08:00
parent f77df2c536
commit f974cb2728
21 changed files with 2149 additions and 5 deletions
--- a/.flake8
+++ b/.flake8
@@ -0,0 +1,19 @@
 [flake8]
 max-line-length = 120
 ignore =
    E203,W191,W503
 exclude =
    build
    .git
    __pycache__
    .tox
    venv
    .venv
    .venv-test
    tmp*
    deployment
    cdk.out
    node_modules
 max-complexity = 10
 require-code = True
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,162 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *.cover
 *.py,cover
 .hypothesis/
 .pytest_cache/
 cover/
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
 db.sqlite3-journal
 # Flask stuff:
 instance/
 .webassets-cache
 # Scrapy stuff:
 .scrapy
 # Sphinx documentation
 docs/_build/
 # PyBuilder
 .pybuilder/
 target/
 # Jupyter Notebook
 .ipynb_checkpoints
 # IPython
 profile_default/
 ipython_config.py
 # pyenv
 #   For a library or package, you might want to ignore these files since the code is
 #   intended to run in multiple environments; otherwise, check them in:
 # .python-version
 # pipenv
 #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 #   install all needed dependencies.
 #Pipfile.lock
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
 #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
 #poetry.lock
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
 #   https://pdm.fming.dev/#use-with-ide
 .pdm.toml
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
 # Celery stuff
 celerybeat-schedule
 celerybeat.pid
 # SageMath parsed files
 *.sage.py
 # Environments
 .env
 .venv
 env/
 venv/
 ENV/
 env.bak/
 venv.bak/
 # Spyder project settings
 .spyderproject
 .spyproject
 # Rope project settings
 .ropeproject
 # mkdocs documentation
 /site
 # mypy
 .mypy_cache/
 .dmypy.json
 dmypy.json
 # Pyre type checker
 .pyre/
 # pytype static type analyzer
 .pytype/
 # Cython debug symbols
 cython_debug/
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 .idea/
 Config
--- a/README.md
+++ b/README.md
@@ -1,11 +1,245 @@
-## My Project
+[中文](./README_CN.md)
-TODO: Fill this README out!
+# Bedrock Access Gateway
-Be sure to:
+OpenAI-Compatible RESTful APIs for Amazon Bedrock
-* Change the title in this README
+## Overview
-* Edit your repository description on GitHub
+
 Amazon Bedrock offers a wide range of foundation models (such as Claude 3 Sonnet/Haiku, Llama 2, Mistral/Mixtral etc.)
 and a broad set of capabilities for you to build generative AI applications.
 Check [Amazon Bedrock](https://aws.amazon.com/bedrock) for more details.
 Sometimes, you might have applications developed using OpenAI APIs or SDKs, and you want to experiment with Amazon
 Bedrock without modifying your codebase. Or you may simply wish to evaluate the capabilities of these foundation models
 in tools like AutoGen etc. Well, this repository allows you to access Amazon Bedrock models seamlessly through OpenAI
 APIs and SDKs, enabling you to test these models without code changes.
 If you find this GitHub repository useful, please consider giving it a free star to show your appreciation and support
 for the project.
 Features:
 - [x] Support streaming response via server-sent events (SSE)
 - [x] Support Model APIs
 - [x] Support Chat Completion APIs
 - [ ] Support Function Call/Tool Call
 - [ ] Support Embedding APIs
 - [ ] Support Image APIs
 > NOTE: 1. The legacy [text completion](https://platform.openai.com/docs/api-reference/completions) API is not
 > supported, you should move to chat completion API. 2. May support other APIs such as fine-tuning, Assistants API etc.
 > in the future.
 Supported Amazon Bedrock models (Model IDs):
 - anthropic.claude-instant-v1
 - anthropic.claude-v2:1
 - anthropic.claude-v2
 - anthropic.claude-3-sonnet-20240229-v1:0
 - anthropic.claude-3-haiku-20240307-v1:0
 - meta.llama2-13b-chat-v1
 - meta.llama2-70b-chat-v1
 - mistral.mistral-7b-instruct-v0:2
 - mistral.mixtral-8x7b-instruct-v0:1
 > Note: The default model is set to `anthropic.claude-3-sonnet-20240229-v1:0`. You can change it via Lambda environment
 > variables.
 ## Get Started
 ### Prerequisites
 Please make sure you have met below prerequisites:
 - Access to Amazon Bedrock foundation models.
 If you haven't got model access, please follow
 the [Set Up](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) guide
 ### Architecture
 The following diagram illustrates the solution architecture. Note that it also includes a new **VPC** with two public
 subnets only for the Application Load Balancer (ALB).
 ![Architecture](assets/arch.svg)
 ### Deployment
 Please follow below steps to deploy the Bedrock Proxy APIs into your AWS account. Only support regions where Amazon
 Bedrock is available (such as us-west-2). The deployment will take approximately 3-5 minutes.
 **Step 1: Create you own custom API key (Optional)**
 > NOTE: This step is to use any string (without spaces) you like to create a custom API Key (credential) that will be
 > used to access the proxy API later. This key does not have to match your actual OpenAI key, and you don't even need to
 > have an OpenAI API key. It is recommended that you take this step and ensure that you keep the key safe and private.
 1. Open the AWS Management Console and navigate to the Systems Manager service.
 2. In the left-hand navigation pane, click on "Parameter Store".
 3. Click on the "Create parameter" button.
 4. In the "Create parameter" window, select the following options:
    - Name: Enter a descriptive name for your parameter (e.g., "BedrockProxyAPIKey").
    - Description: Optionally, provide a description for the parameter.
    - Tier: Select **Standard**.
    - Type: Select **SecureString**.
    - Value: Any string (without spaces).
 5. Click "Create parameter".
 6. Make a note of the parameter name you used (e.g., "BedrockProxyAPIKey"). You'll need this in the next step.
 **Step 2: Deploy the CloudFormation stack**
 1. Sign in to AWS Management Console, switch to the region to deploy the CloudFormation Stack to.
 2. Click the following button to launch the CloudFormation Stack in that region.
   [![Launch Stack](assets/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home#/stacks/create/template?stackName=BedrockProxyAPI&templateURL=https://aws-gcr-solutions.s3.amazonaws.com/bedrock-proxy-api/latest/BedrockProxy.template)
 3. Click "Next".
 4. On the "Specify stack details" page, provide the following information:
    - Stack name: Change the stack name if needed.
    - ApiKeyParam (if you set up an API key in Step 1): Enter the parameter name you used for storing the API key (
      e.g., "BedrockProxyAPIKey"). If you did not set up an API key, leave this field blank.
      Click "Next".
 5. On the "Configure stack options" page, you can leave the default settings or customize them according to your needs.
 6. Click "Next".
 7. On the "Review" page, review the details of the stack you're about to create. Check the "I acknowledge that AWS
   CloudFormation might create IAM resources" checkbox at the bottom.
 8. Click "Create stack".
 That is it! Once deployed, click the CloudFormation stack and go to **Outputs** tab, you can find the API Base URL
 from `APIBaseUrl`, the value should look like `http://xxxx.xxx.elb.amazonaws.com/api/v1`.
 ### SDK/API Usage
 All you need is the API Key and the API Base URL. And if you didn't
 set up your own key, then the default API Key `bedrock` will be used.
 Now, you can try out the proxy APIs. Let's say you want to test Claude 3 Sonnet model, then
 use `anthropic.claude-3-sonnet-20240229-v1:0` as the Model ID.
 - **Example API Usage**
 ```bash
 curl https://<API base url>/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <API Key>" \
  -d '{
    "model": "anthropic.claude-3-sonnet-20240229-v1:0",
    "messages": [
      {
        "role": "user",
        "content": "Hello!"
      }
    ]
  }'
 ```
 - **Example SDK Usage**
 ```bash
 export OPENAI_API_KEY=<API key>
 export OPENAI_API_BASE=<API base url>
 ```
 ```python
 from openai import OpenAI
 client = OpenAI()
 completion = client.chat.completions.create(
    model="anthropic.claude-3-sonnet-20240229-v1:0",
    messages=[{"role": "user", "content": "Hello!"}],
 )
 print(completion.choices[0].message.content)
 ```
 ## Other Examples
 ### AutoGen
 Below is an image of setting up the model in AutoGen studio.
 ![AutoGen Model](assets/autogen-model.png)
 ### LangChain
 Make sure you use `ChatOpenAI(...)` instead of `OpenAI(...)`
 ```python
 # pip install langchain-openai
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI
 chat = ChatOpenAI(
    model="anthropic.claude-3-sonnet-20240229-v1:0",
    temperature=0,
    openai_api_key="xxxx",
    openai_api_base="http://xxx.elb.amazonaws.com/api/v1",
 )
 template = """Question: {question}
 Answer: Let's think step by step."""
 prompt = PromptTemplate.from_template(template)
 llm_chain = LLMChain(prompt=prompt, llm=chat)
 question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
 response = llm_chain.invoke(question)
 print(response)
 ```
 ## FAQs
 ### About Privacy
 This application does not collect any of your data. Furthermore, it does not log any requests or responses by default.
 ### Why not used API Gateway instead of Application Load Balancer?
 Short answer is that API Gateway does not support server-sent events (SSE) for streaming response.
 ### Which regions are supported?
 This solution only supports the regions where Amazon Bedrock is available, so:
 - US East (N. Virginia)
 - US West (Oregon)
 - Asia Pacific (Singapore)
 - Asia Pacific (Tokyo)
 - Europe (Frankfurt)
 Note that not all models are available in those regions.
 ### Can I build and use my own ECR image
 Yes, you can clone the repo and build the container image by yourself (src/Dockerfile) and then push to your ECR repo.
 Replace the repo url in the CloudFormation template before you deploy.
 ### Can I run this locally
 Yes, you can run this locally, then the API base url should be like `http://localhost:8000/api/v1`
 ### Any performance sacrifice or latency increase by using the proxy APIs
 This is yet to be tested. But you should use this solution for PoC only.
 ### Any plan to support SageMaker models?
 Currently, there is no plan of supporting SageMaker models. This depends on if there are customer asks.
 ### Any plan to support Bedrock custom models?
 Fine-tuned models and models with Provisioned Throughput are not supported. You can clone the repo and make the
 customization if needed.
 ### How to upgrade?
 If there is no changes on architecture, you can simply deploy the latest image to your Lambda to use the new
 features (manually) without redeploying the whole CloudFormation stack.
 ## Security
--- a/README_CN.md
+++ b/README_CN.md
@@ -0,0 +1,236 @@
 [English](./README.md)
 # Bedrock Access Gateway
 使用兼容OpenAI的API访问Amazon Bedrock
 ## 概述
 Amazon Bedrock提供了广泛的基础模型(如Claude 3 Sonnet/Haiku、Llama 2、Mistral/Mixtral等)
 ,以及构建生成式AI应用程序的多种功能。更多详细信息,请查看[Amazon Bedrock](https://aws.amazon.com/bedrock)。
 有时,您可能已经使用OpenAI的API或SDK构建了应用程序,并希望在不修改代码的情况下试用Amazon
 Bedrock的模型。或者,您可能只是希望在AutoGen等工具中评估这些基础模型的功能。 好消息是, 这里提供了一种方便的途径,让您可以使用
 OpenAI 的 API 或 SDK 无缝集成并试用 Amazon Bedrock 的模型,而无需对现有代码进行修改。
 如果您觉得这个项目有用,请考虑给它点个一个免费的小星星。
 功能列表：
 - [x] 支持 server-sent events (SSE)的流式响应
 - [x] 支持 Model APIs
 - [x] 支持 Chat Completion APIs
 - [ ] 支持 Function Call/Tool Call
 - [ ] 支持 Embedding APIs
 - [ ] 支持 Image APIs
 > 注意： 1，不支持旧的 [text completion](https://platform.openai.com/docs/api-reference/completions) API，请更改为使用Chat
 > Completion API。 2.未来可能支持其他API, 如Fine-tune、Assistants API等。
 支持的Amazon Bedrock模型列表（Model IDs）：
 - anthropic.claude-instant-v1
 - anthropic.claude-v2:1
 - anthropic.claude-v2
 - anthropic.claude-3-sonnet-20240229-v1:0
 - anthropic.claude-3-haiku-20240307-v1:0
 - meta.llama2-13b-chat-v1
 - meta.llama2-70b-chat-v1
 - mistral.mistral-7b-instruct-v0:2
 - mistral.mixtral-8x7b-instruct-v0:1
 > Note: 默认模型为 `anthropic.claude-3-sonnet-20240229-v1:0`， 可以通过更改Lambda环境变量进行更改。
 ## 使用指南
 ### 前提条件
 请确保您已满足以下先决条件:
 - 可以访问Amazon Bedrock基础模型。
 如果您还没有获得模型访问权限,请参考[配置](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html)指南。
 ### 架构图
 下图展示了本方案的架构。请注意,它还包括一个新的**VPC**,其中只有两个公共子网用于应用程序负载均衡器(ALB)。
 ![Architecture](assets/arch.svg)
 ### 部署
 请按以下步骤将Bedrock代理API部署到您的AWS账户中。仅支持Amazon Bedrock可用的区域(如us-west-2)。
 **第一步: 自定义您的API Key (可选)**
 > 注意:这一步是使用任意字符串（不带空格）创建一个自定义的API Key(凭证),将用于后续访问代理API。此API Key不必与您实际的OpenAI
 > Key一致,您甚至无需拥有OpenAI API Key。建议您执行此步操作并且请确保保管好此API Key。
 1. 打开AWS管理控制台,导航到Systems Manager服务。
 2. 在左侧导航窗格中,单击"参数存储"。
 3. 单击"创建参数"按钮。
 4. 在"创建参数"窗口中,选择以下选项:
    - 名称:输入参数的描述性名称(例如"BedrockProxyAPIKey")。
    - 描述:可选,为参数提供描述。
    - 层级:选择**标准**。
    - 类型:选择**SecureString**。
    - 值: 随意字符串（不带空格）。
 5. 单击"创建参数"。
 6. 记录您使用的参数名称(例如"BedrockProxyAPIKey")。您将在下一步中需要它。
 **第二步: 部署CloudFormation堆栈**
 1. 登录AWS管理控制台,切换到要部署CloudFormation堆栈的区域。
 2. 单击以下按钮在该区域启动CloudFormation堆栈。
   [![Launch Stack](assets/launch-stack.png)](https://console.aws.amazon.com/cloudformation/home#/stacks/create/template?stackName=BedrockProxyAPI&templateURL=https://aws-gcr-solutions.s3.amazonaws.com/bedrock-proxy-api/latest/BedrockProxy.template)
 3. 单击"下一步"。
 4. 在"指定堆栈详细信息"页面,提供以下信息:
    - 堆栈名称: 可以根据需要更改名称。
    - ApiKeyParam(如果在步骤1中设置了API密钥):输入您用于存储API密钥的参数名称(例如"BedrockProxyAPIKey")
      。如果您没有设置API密钥,请将此字段留空。
      单击"下一步"。
 5. 在"配置堆栈选项"页面,您可以保留默认设置或根据需要进行自定义。
 6. 单击"下一步"。
 7. 在"审核"页面,查看您即将创建的堆栈详细信息。勾选底部的"我确认，AWS CloudFormation 可能创建 IAM 资源。"复选框。
 8. 单击"创建堆栈"。
 仅此而已。部署完成后,点击CloudFormation堆栈,进入"输出"选项卡,你可以从"APIBaseUrl"
 中找到API Base URL,它应该类似于`http://xxxx.xxx.elb.amazonaws.com/api/v1` 这样的格式。
 ### SDK/API使用
 你只需要API Key和API Base URL。如果你没有设置自己的密钥,那么默认将使用API Key `bedrock`。
 现在,你可以尝试使用代理API了。假设你想测试Claude 3 Sonnet模型,那么使用"anthropic.claude-3-sonnet-20240229-v1:0"作为模型ID。
 - **API 使用示例**
 ```bash
 curl https://<API base url>/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <API Key>" \
  -d '{
    "model": "anthropic.claude-3-sonnet-20240229-v1:0",
    "messages": [
      {
        "role": "user",
        "content": "Hello!"
      }
    ]
  }'
 ```
 - **SDK 使用示例**
 ```bash
 export OPENAI_API_KEY=<API key>
 export OPENAI_API_BASE=<API base url>
 ```
 ```python
 from openai import OpenAI
 client = OpenAI()
 completion = client.chat.completions.create(
    model="anthropic.claude-3-sonnet-20240229-v1:0",
    messages=[{"role": "user", "content": "Hello!"}],
 )
 print(completion.choices[0].message.content)
 ```
 ## 其他例子
 ### AutoGen
 例如在AutoGen studio配置和使用模型
 ![AutoGen Model](assets/autogen-model.png)
 ### LangChain
 请确保使用的示`ChatOpenAI(...)` ，而不是`OpenAI(...)`
 ```python
 # pip install langchain-openai
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI
 chat = ChatOpenAI(
    model="anthropic.claude-3-sonnet-20240229-v1:0",
    temperature=0,
    openai_api_key="xxxx",
    openai_api_base="http://xxx.elb.amazonaws.com/api/v1",
 )
 template = """Question: {question}
 Answer: Let's think step by step."""
 prompt = PromptTemplate.from_template(template)
 llm_chain = LLMChain(prompt=prompt, llm=chat)
 question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
 response = llm_chain.invoke(question)
 print(response)
 ```
 ## FAQs
 ### 关于隐私
 这个方案不会收集您的任何数据。而且,它默认情况下也不会记录任何请求或响应。
 ### 为什么没有使用API Gateway 而是使用了Application Load Balancer?
 简单的答案是API Gateway不支持 server-sent events (SSE) 用于流式响应。
 ### 支持哪些区域?
 只支持Amazon Bedrock可用的区域,即:
 - 美国东部(弗吉尼亚北部)
 - 美国西部(俄勒冈州)
 - 亚太地区(新加坡)
 - 亚太地区(东京)
 - 欧洲(法兰克福)
 注意，并非所有模型都在上面区可用。
 ### 我可以构建并使用自己的ECR镜像吗?
 是的,你可以克隆repo并自行构建容器镜像(src/Dockerfile),然后推送到你自己的ECR仓库。
 在部署之前,请在CloudFormation模板中替换镜像仓库URL。
 ### 我可以在本地运行吗?
 是的,你可以在本地运行,那么API Base URL应该类似于`http://localhost:8000/api/v1`
 ### 使用代理API会有任何性能牺牲或延迟增加吗?
 这还有待测试。但你应该只将此解决方案用于概念验证。
 ### 有计划支持SageMaker模型吗?
 目前没有支持SageMaker模型的计划。这取决于是否有客户需求。
 ### 有计划支持Bedrock自定义模型吗?
 不支持微调模型和设置了已预配吞吐量的模型。如有需要,你可以克隆repo并进行自定义。
 ### 如何升级?
 如果架构没有变化,你可以简单地将最新镜像部署到Lambda中,以使用新功能(手动),而无需重新部署整个CloudFormation堆栈。
 ## 安全
 更多信息,请参阅[CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications)。
 ## 许可证
 本项目根据MIT-0许可证获得许可。请参阅LICENSE文件。
--- a/assets/arch.svg
+++ b/assets/arch.svg
--- a/assets/autogen-agent.png
+++ b/assets/autogen-agent.png
--- a/assets/autogen-model.png
+++ b/assets/autogen-model.png
--- a/assets/launch-stack.png
+++ b/assets/launch-stack.png
--- a/deployment/BedrockProxy.template
+++ b/deployment/BedrockProxy.template
@@ -0,0 +1,805 @@
 {
  "Parameters": {
    "ApiKeyParam": {
      "Type": "String",
      "Default": "",
      "Description": "The parameter name in System Manager used to store the API Key, leave blank to use a default key"
    }
  },
  "Resources": {
    "VPCB9E5F0B4": {
      "Type": "AWS::EC2::VPC",
      "Properties": {
        "CidrBlock": "10.250.0.0/16",
        "EnableDnsHostnames": true,
        "EnableDnsSupport": true,
        "InstanceTenancy": "default",
        "Tags": [
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC"
          }
        ]
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/Resource"
      }
    },
    "VPCPublicSubnet1SubnetB4246D30": {
      "Type": "AWS::EC2::Subnet",
      "Properties": {
        "AvailabilityZone": {
          "Fn::Select": [
            0,
            {
              "Fn::GetAZs": ""
            }
          ]
        },
        "CidrBlock": "10.250.0.0/24",
        "MapPublicIpOnLaunch": true,
        "Tags": [
          {
            "Key": "aws-cdk:subnet-name",
            "Value": "Public"
          },
          {
            "Key": "aws-cdk:subnet-type",
            "Value": "Public"
          },
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC/PublicSubnet1"
          }
        ],
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/Subnet"
      }
    },
    "VPCPublicSubnet1RouteTableFEE4B781": {
      "Type": "AWS::EC2::RouteTable",
      "Properties": {
        "Tags": [
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC/PublicSubnet1"
          }
        ],
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/RouteTable"
      }
    },
    "VPCPublicSubnet1RouteTableAssociation0B0896DC": {
      "Type": "AWS::EC2::SubnetRouteTableAssociation",
      "Properties": {
        "RouteTableId": {
          "Ref": "VPCPublicSubnet1RouteTableFEE4B781"
        },
        "SubnetId": {
          "Ref": "VPCPublicSubnet1SubnetB4246D30"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/RouteTableAssociation"
      }
    },
    "VPCPublicSubnet1DefaultRoute91CEF279": {
      "Type": "AWS::EC2::Route",
      "Properties": {
        "DestinationCidrBlock": "0.0.0.0/0",
        "GatewayId": {
          "Ref": "VPCIGWB7E252D3"
        },
        "RouteTableId": {
          "Ref": "VPCPublicSubnet1RouteTableFEE4B781"
        }
      },
      "DependsOn": [
        "VPCVPCGW99B986DC"
      ],
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet1/DefaultRoute"
      }
    },
    "VPCPublicSubnet2Subnet74179F39": {
      "Type": "AWS::EC2::Subnet",
      "Properties": {
        "AvailabilityZone": {
          "Fn::Select": [
            1,
            {
              "Fn::GetAZs": ""
            }
          ]
        },
        "CidrBlock": "10.250.1.0/24",
        "MapPublicIpOnLaunch": true,
        "Tags": [
          {
            "Key": "aws-cdk:subnet-name",
            "Value": "Public"
          },
          {
            "Key": "aws-cdk:subnet-type",
            "Value": "Public"
          },
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC/PublicSubnet2"
          }
        ],
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/Subnet"
      }
    },
    "VPCPublicSubnet2RouteTable6F1A15F1": {
      "Type": "AWS::EC2::RouteTable",
      "Properties": {
        "Tags": [
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC/PublicSubnet2"
          }
        ],
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/RouteTable"
      }
    },
    "VPCPublicSubnet2RouteTableAssociation5A808732": {
      "Type": "AWS::EC2::SubnetRouteTableAssociation",
      "Properties": {
        "RouteTableId": {
          "Ref": "VPCPublicSubnet2RouteTable6F1A15F1"
        },
        "SubnetId": {
          "Ref": "VPCPublicSubnet2Subnet74179F39"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/RouteTableAssociation"
      }
    },
    "VPCPublicSubnet2DefaultRouteB7481BBA": {
      "Type": "AWS::EC2::Route",
      "Properties": {
        "DestinationCidrBlock": "0.0.0.0/0",
        "GatewayId": {
          "Ref": "VPCIGWB7E252D3"
        },
        "RouteTableId": {
          "Ref": "VPCPublicSubnet2RouteTable6F1A15F1"
        }
      },
      "DependsOn": [
        "VPCVPCGW99B986DC"
      ],
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/PublicSubnet2/DefaultRoute"
      }
    },
    "VPCIGWB7E252D3": {
      "Type": "AWS::EC2::InternetGateway",
      "Properties": {
        "Tags": [
          {
            "Key": "Name",
            "Value": "BedrockProxy/VPC"
          }
        ]
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/IGW"
      }
    },
    "VPCVPCGW99B986DC": {
      "Type": "AWS::EC2::VPCGatewayAttachment",
      "Properties": {
        "InternetGatewayId": {
          "Ref": "VPCIGWB7E252D3"
        },
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/VPC/VPCGW"
      }
    },
    "ProxyApiHandlerServiceRoleBE71BFB1": {
      "Type": "AWS::IAM::Role",
      "Properties": {
        "AssumeRolePolicyDocument": {
          "Statement": [
            {
              "Action": "sts:AssumeRole",
              "Effect": "Allow",
              "Principal": {
                "Service": "lambda.amazonaws.com"
              }
            }
          ],
          "Version": "2012-10-17"
        },
        "ManagedPolicyArns": [
          {
            "Fn::Join": [
              "",
              [
                "arn:",
                {
                  "Ref": "AWS::Partition"
                },
                ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
              ]
            ]
          }
        ]
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/ServiceRole/Resource"
      }
    },
    "ProxyApiHandlerServiceRoleDefaultPolicy86681202": {
      "Type": "AWS::IAM::Policy",
      "Properties": {
        "PolicyDocument": {
          "Statement": [
            {
              "Action": [
                "bedrock:InvokeModel",
                "bedrock:InvokeModelWithResponseStream"
              ],
              "Effect": "Allow",
              "Resource": "arn:aws:bedrock:*::foundation-model/*"
            },
            {
              "Action": [
                "ssm:DescribeParameters",
                "ssm:GetParameters",
                "ssm:GetParameter",
                "ssm:GetParameterHistory"
              ],
              "Effect": "Allow",
              "Resource": {
                "Fn::Join": [
                  "",
                  [
                    "arn:",
                    {
                      "Ref": "AWS::Partition"
                    },
                    ":ssm:",
                    {
                      "Ref": "AWS::Region"
                    },
                    ":",
                    {
                      "Ref": "AWS::AccountId"
                    },
                    ":parameter/",
                    {
                      "Ref": "ApiKeyParam"
                    }
                  ]
                ]
              }
            }
          ],
          "Version": "2012-10-17"
        },
        "PolicyName": "ProxyApiHandlerServiceRoleDefaultPolicy86681202",
        "Roles": [
          {
            "Ref": "ProxyApiHandlerServiceRoleBE71BFB1"
          }
        ]
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/ServiceRole/DefaultPolicy/Resource"
      }
    },
    "ProxyApiHandlerEC15A492": {
      "Type": "AWS::Lambda::Function",
      "Properties": {
        "Architectures": [
          "arm64"
        ],
        "Code": {
          "ImageUri": {
            "Fn::Join": [
              "",
              [
                {
                  "Fn::Select": [
                    4,
                    {
                      "Fn::Split": [
                        ":",
                        {
                          "Fn::FindInMap": [
                            "ProxyRegionTable03E5BEB3",
                            {
                              "Ref": "AWS::Region"
                            },
                            "repoArn"
                          ]
                        }
                      ]
                    }
                  ]
                },
                ".dkr.ecr.",
                {
                  "Fn::Select": [
                    3,
                    {
                      "Fn::Split": [
                        ":",
                        {
                          "Fn::FindInMap": [
                            "ProxyRegionTable03E5BEB3",
                            {
                              "Ref": "AWS::Region"
                            },
                            "repoArn"
                          ]
                        }
                      ]
                    }
                  ]
                },
                ".",
                {
                  "Ref": "AWS::URLSuffix"
                },
                "/bedrock-proxy-api:latest"
              ]
            ]
          }
        },
        "Description": "Bedrock Proxy API Handler",
        "Environment": {
          "Variables": {
            "API_KEY_PARAM_NAME": {
              "Ref": "ApiKeyParam"
            },
            "DEBUG": "false",
            "DEFAULT_MODEL": {
              "Fn::FindInMap": [
                "ProxyRegionTable03E5BEB3",
                {
                  "Ref": "AWS::Region"
                },
                "model"
              ]
            }
          }
        },
        "MemorySize": 1024,
        "PackageType": "Image",
        "Role": {
          "Fn::GetAtt": [
            "ProxyApiHandlerServiceRoleBE71BFB1",
            "Arn"
          ]
        },
        "Timeout": 300
      },
      "DependsOn": [
        "ProxyApiHandlerServiceRoleDefaultPolicy86681202",
        "ProxyApiHandlerServiceRoleBE71BFB1"
      ],
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/Resource"
      }
    },
    "ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779": {
      "Type": "AWS::Lambda::Permission",
      "Properties": {
        "Action": "lambda:InvokeFunction",
        "FunctionName": {
          "Fn::GetAtt": [
            "ProxyApiHandlerEC15A492",
            "Arn"
          ]
        },
        "Principal": "elasticloadbalancing.amazonaws.com"
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ApiHandler/Invoke2UTWxhlfyqbT5FTn--5jvgbLgj+FfJwzswGk55DU1H--Y="
      }
    },
    "ProxyALB87756780": {
      "Type": "AWS::ElasticLoadBalancingV2::LoadBalancer",
      "Properties": {
        "LoadBalancerAttributes": [
          {
            "Key": "deletion_protection.enabled",
            "Value": "false"
          }
        ],
        "Scheme": "internet-facing",
        "SecurityGroups": [
          {
            "Fn::GetAtt": [
              "ProxyALBSecurityGroup0D6CA3DA",
              "GroupId"
            ]
          }
        ],
        "Subnets": [
          {
            "Ref": "VPCPublicSubnet1SubnetB4246D30"
          },
          {
            "Ref": "VPCPublicSubnet2Subnet74179F39"
          }
        ],
        "Type": "application"
      },
      "DependsOn": [
        "VPCPublicSubnet1DefaultRoute91CEF279",
        "VPCPublicSubnet1RouteTableAssociation0B0896DC",
        "VPCPublicSubnet2DefaultRouteB7481BBA",
        "VPCPublicSubnet2RouteTableAssociation5A808732"
      ],
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ALB/Resource"
      }
    },
    "ProxyALBSecurityGroup0D6CA3DA": {
      "Type": "AWS::EC2::SecurityGroup",
      "Properties": {
        "GroupDescription": "Automatically created Security Group for ELB BedrockProxyALB1CE4CAD1",
        "SecurityGroupEgress": [
          {
            "CidrIp": "255.255.255.255/32",
            "Description": "Disallow all traffic",
            "FromPort": 252,
            "IpProtocol": "icmp",
            "ToPort": 86
          }
        ],
        "SecurityGroupIngress": [
          {
            "CidrIp": "0.0.0.0/0",
            "Description": "Allow from anyone on port 80",
            "FromPort": 80,
            "IpProtocol": "tcp",
            "ToPort": 80
          }
        ],
        "VpcId": {
          "Ref": "VPCB9E5F0B4"
        }
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ALB/SecurityGroup/Resource"
      }
    },
    "ProxyALBListener933E9515": {
      "Type": "AWS::ElasticLoadBalancingV2::Listener",
      "Properties": {
        "DefaultActions": [
          {
            "TargetGroupArn": {
              "Ref": "ProxyALBListenerTargetsGroup187739FA"
            },
            "Type": "forward"
          }
        ],
        "LoadBalancerArn": {
          "Ref": "ProxyALB87756780"
        },
        "Port": 80,
        "Protocol": "HTTP"
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ALB/Listener/Resource"
      }
    },
    "ProxyALBListenerTargetsGroup187739FA": {
      "Type": "AWS::ElasticLoadBalancingV2::TargetGroup",
      "Properties": {
        "HealthCheckEnabled": false,
        "TargetType": "lambda",
        "Targets": [
          {
            "Id": {
              "Fn::GetAtt": [
                "ProxyApiHandlerEC15A492",
                "Arn"
              ]
            }
          }
        ]
      },
      "DependsOn": [
        "ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779"
      ],
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/Proxy/ALB/Listener/TargetsGroup/Resource"
      }
    },
    "CDKMetadata": {
      "Type": "AWS::CDK::Metadata",
      "Properties": {
        "Analytics": "v2:deflate64:H4sIAAAAAAAA/1VRXW/CMAz8LbyHDMovAKZNSJtWFcTr5LpeZ0iTKHFAqOp/n1q+uief7y7ynZLp+WKhZxM4xylWx6nhUrdbATyq9Y/NIUBDQkHBOX63hJlu9x57aZ+vVZ5Kw7hNpSXpuScqXBLaQWnoyT+5ZYwOGYSdfZh7sLFCwZK8g9AZLrczt20pAvjbkBW1JUyB5fIeXPLDgTHRKcKgC/IusrhwWUEkZaApK9Dtq8MjhU0DNb0li/cIY5xTaDhGdrZTDI1uC3etMczcGcYh2hV1igxEYTQOqhIMWGRbnzLdLr03jEPLDwfVatAo9E//7WMfRyF789zxSN9BqEketUdr16mCoksBh6if4D3buodfSXy6fsrIsHa2Yhk6WleRPsSXUzbT87meTQ6ReRqSFW5IF9f5B/Z2H8goAgAA"
      },
      "Metadata": {
        "aws:cdk:path": "BedrockProxy/CDKMetadata/Default"
      },
      "Condition": "CDKMetadataAvailable"
    }
  },
  "Mappings": {
    "ProxyRegionTable03E5BEB3": {
      "us-east-1": {
        "repoArn": "arn:aws:ecr:us-east-1:366590864501:repository/bedrock-proxy-api",
        "model": "anthropic.claude-3-sonnet-20240229-v1:0"
      },
      "us-west-2": {
        "repoArn": "arn:aws:ecr:us-west-2:366590864501:repository/bedrock-proxy-api",
        "model": "anthropic.claude-3-sonnet-20240229-v1:0"
      },
      "ap-southeast-1": {
        "repoArn": "arn:aws:ecr:ap-southeast-1:366590864501:repository/bedrock-proxy-api",
        "model": "anthropic.claude-v2"
      },
      "ap-northeast-1": {
        "repoArn": "arn:aws:ecr:ap-northeast-1:366590864501:repository/bedrock-proxy-api",
        "model": "anthropic.claude-v2:1"
      },
      "eu-central-1": {
        "repoArn": "arn:aws:ecr:eu-central-1:366590864501:repository/bedrock-proxy-api",
        "model": "anthropic.claude-v2:1"
      }
    }
  },
  "Outputs": {
    "APIBaseUrl": {
      "Description": "Proxy API Base URL (OPENAI_API_BASE)",
      "Value": {
        "Fn::Join": [
          "",
          [
            "http://",
            {
              "Fn::GetAtt": [
                "ProxyALB87756780",
                "DNSName"
              ]
            },
            "/api/v1"
          ]
        ]
      }
    }
  },
  "Conditions": {
    "CDKMetadataAvailable": {
      "Fn::Or": [
        {
          "Fn::Or": [
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "af-south-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-east-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-northeast-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-northeast-2"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-south-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-southeast-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ap-southeast-2"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "ca-central-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "cn-north-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "cn-northwest-1"
              ]
            }
          ]
        },
        {
          "Fn::Or": [
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-central-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-north-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-south-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-west-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-west-2"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "eu-west-3"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "il-central-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "me-central-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "me-south-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "sa-east-1"
              ]
            }
          ]
        },
        {
          "Fn::Or": [
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "us-east-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "us-east-2"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "us-west-1"
              ]
            },
            {
              "Fn::Equals": [
                {
                  "Ref": "AWS::Region"
                },
                "us-west-2"
              ]
            }
          ]
        }
      ]
    }
  }
 }
--- a/src/Dockerfile
+++ b/src/Dockerfile
@@ -0,0 +1,9 @@
 FROM public.ecr.aws/lambda/python:3.12
 COPY ./api ./api
 COPY requirements.txt .
 RUN pip3 install -r requirements.txt -U --no-cache-dir
 CMD [ "api.app.handler" ]
--- a/src/api/init.py
+++ b/src/api/init.py
--- a/src/api/app.py
+++ b/src/api/app.py
@@ -0,0 +1,52 @@
 import logging
 import uvicorn
 from fastapi import FastAPI
 from fastapi.exceptions import RequestValidationError
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import PlainTextResponse
 from mangum import Mangum
 from api.routers import model, chat
 from api.setting import API_ROUTE_PREFIX, TITLE, DESCRIPTION, SUMMARY, VERSION
 config = {
    "title": TITLE,
    "description": DESCRIPTION,
    "summary": SUMMARY,
    "version": VERSION,
 }
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
 )
 app = FastAPI(**config)
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 app.include_router(model.router, prefix=API_ROUTE_PREFIX)
 app.include_router(chat.router, prefix=API_ROUTE_PREFIX)
@app.get("/health")
 async def health():
    """For health check if needed"""
    return {"status": "OK"}
@app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request, exc):
    return PlainTextResponse(str(exc), status_code=400)
 handler = Mangum(app)
 if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)
--- a/src/api/auth.py
+++ b/src/api/auth.py
@@ -0,0 +1,28 @@
 import os
 from typing import Annotated
 import boto3
 from fastapi import Depends, HTTPException, status
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from api.setting import DEFAULT_API_KEYS
 api_key_param = os.environ.get("API_KEY_PARAM_NAME")
 if api_key_param:
    ssm = boto3.client("ssm")
    api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"][
        "Value"
    ]
 else:
    api_key = DEFAULT_API_KEYS
 security = HTTPBearer()
 def api_key_auth(
    credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)]
 ):
    if credentials.credentials != api_key:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key"
        )
--- a/src/api/models/init.py
+++ b/src/api/models/init.py
@@ -0,0 +1 @@
 from api.models.bedrock import ClaudeModel, SUPPORTED_BEDROCK_MODELS, get_model
--- a/src/api/models/bedrock.py
+++ b/src/api/models/bedrock.py
@@ -0,0 +1,391 @@
 import json
 import logging
 import uuid
 from abc import ABC, abstractmethod
 from typing import AsyncIterable
 import boto3
 from api.schema import (
    ChatResponse,
    ChatRequest,
    ChatRequestMessage,
    Choice,
    ChatResponseMessage,
    Usage,
    ChatStreamResponse,
    ChoiceDelta,
 )
 from api.setting import DEBUG, AWS_REGION
 logger = logging.getLogger(__name__)
 bedrock_runtime = boto3.client(
    service_name="bedrock-runtime",
    region_name=AWS_REGION,
 )
 SUPPORTED_BEDROCK_MODELS = {
    "anthropic.claude-instant-v1": "Claude Instant",
    "anthropic.claude-v2:1": "Claude",
    "anthropic.claude-v2": "Claude",
    "anthropic.claude-3-sonnet-20240229-v1:0": "Claude 3 Sonnet",
    "anthropic.claude-3-haiku-20240307-v1:0": "Claude 3 Haiku",
    "meta.llama2-13b-chat-v1": "Llama 2 Chat 13B",
    "meta.llama2-70b-chat-v1": "Llama 2 Chat 70B",
    "mistral.mistral-7b-instruct-v0:2": "Mistral 7B Instruct",
    "mistral.mixtral-8x7b-instruct-v0:1": "Mixtral 8x7B Instruct",
 }
 class BaseChatModel(ABC):
    """Represent a basic chat model
    Currently, only Bedrock model is supported, but may be used for SageMaker models if needed.
    """
    @abstractmethod
    def chat(self, chat_request: ChatRequest) -> ChatResponse:
        """Handle a basic chat completion requests."""
        pass
    @abstractmethod
    def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
        """Handle a basic chat completion requests with stream response."""
        pass
    def _generate_message_id(self) -> str:
        return "chatcmpl-" + str(uuid.uuid4())[:8]
    def _stream_response_to_bytes(self, response: ChatStreamResponse) -> bytes:
        return "data: {}\n\n".format(response.model_dump_json()).encode("utf-8")
 # https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html
 class BedrockModel(BaseChatModel):
    accept = "application/json"
    content_type = "application/json"
    def _invoke_model(self, args: dict, model_id: str, with_stream: bool = False):
        body = json.dumps(args)
        if DEBUG:
            logger.info("Invoke Bedrock Model: " + model_id)
            logger.info("Bedrock request body: " + body)
        if with_stream:
            return bedrock_runtime.invoke_model_with_response_stream(
                body=body,
                modelId=model_id,
                accept=self.accept,
                contentType=self.content_type,
            )
        return bedrock_runtime.invoke_model(
            body=body,
            modelId=model_id,
            accept=self.accept,
            contentType=self.content_type,
        )
    def _create_response(
        self,
        model: str,
        message: str,
        message_id: str,
        input_tokens: int = 0,
        output_tokens: int = 0,
    ) -> ChatResponse:
        choice = Choice(
            index=0,
            message=ChatResponseMessage(
                role="assistant",
                content=message,
            ),
            finish_reason="stop",
        )
        response = ChatResponse(
            id=message_id,
            model=model,
            choices=[choice],
            usage=Usage(
                prompt_tokens=input_tokens,
                completion_tokens=output_tokens,
                total_tokens=input_tokens + output_tokens,
            ),
        )
        if DEBUG:
            logger.info("Proxy response :" + response.model_dump_json())
        return response
    def _create_response_stream(
        self, model: str, message_id: str, chunk_message: str, finish_reason: str | None
    ) -> ChatStreamResponse:
        choice = ChoiceDelta(
            index=0,
            delta=ChatResponseMessage(
                role="assistant",
                content=chunk_message,
            ),
            finish_reason=finish_reason,
        )
        response = ChatStreamResponse(
            id=message_id,
            model=model,
            choices=[choice],
        )
        if DEBUG:
            logger.info("Proxy response :" + response.model_dump_json())
        return response
 def get_model(model_id: str) -> BedrockModel:
    model_name = SUPPORTED_BEDROCK_MODELS.get(model_id, "")
    if DEBUG:
        logger.info("model name is " + model_name)
    if model_name in ["Claude Instant", "Claude", "Claude 3 Sonnet", "Claude 3 Haiku"]:
        return ClaudeModel()
    elif model_name in ["Llama 2 Chat 13B", "Llama 2 Chat 70B"]:
        return Llama2Model()
    elif model_name in ["Mistral 7B Instruct", "Mixtral 8x7B Instruct"]:
        return MistralModel()
    else:
        logger.error("Unsupported model id " + model_id)
        raise ValueError("Invalid model ID")
 class ClaudeModel(BedrockModel):
    anthropic_version = "bedrock-2023-05-31"
    def _parse_args(self, chat_request: ChatRequest) -> dict:
        args = {
            "anthropic_version": self.anthropic_version,
            "max_tokens": chat_request.max_tokens,
            "top_p": chat_request.top_p,
            "temperature": chat_request.temperature,
        }
        if chat_request.messages[0].role == "system":
            args["system"] = chat_request.messages[0].content
            args["messages"] = [
                {"role": msg.role, "content": msg.content}
                for msg in chat_request.messages[1:]
            ]
        else:
            args["messages"] = [
                {"role": msg.role, "content": msg.content}
                for msg in chat_request.messages
            ]
        return args
    def chat(self, chat_request: ChatRequest) -> ChatResponse:
        response = self._invoke_model(
            args=self._parse_args(chat_request), model_id=chat_request.model
        )
        response_body = json.loads(response.get("body").read())
        if DEBUG:
            logger.info("Bedrock response body: " + str(response_body))
        return self._create_response(
            model=chat_request.model,
            message=response_body["content"][0]["text"],
            message_id=response_body["id"],
            input_tokens=response_body["usage"]["input_tokens"],
            output_tokens=response_body["usage"]["output_tokens"],
        )
    def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
        response = self._invoke_model(
            args=self._parse_args(chat_request),
            model_id=chat_request.model,
            with_stream=True,
        )
        msg_id = ""
        chunk_id = 0
        for event in response.get("body"):
            if DEBUG:
                logger.info("Bedrock response chunk: " + str(event))
            chunk = json.loads(event["chunk"]["bytes"])
            chunk_id += 1
            if chunk["type"] == "message_start":
                msg_id = chunk["message"]["id"]
                continue
            if chunk["type"] == "message_delta":
                chunk_message = ""
                finish_reason = "stop"
            elif chunk["type"] == "content_block_delta":
                chunk_message = chunk["delta"]["text"]
                finish_reason = None
            else:
                continue
            response = self._create_response_stream(
                model=chat_request.model,
                message_id=msg_id,
                chunk_message=chunk_message,
                finish_reason=finish_reason,
            )
            yield self._stream_response_to_bytes(response)
 class Llama2Model(BedrockModel):
    def _convert_prompt(self, messages: list[ChatRequestMessage]) -> str:
        """Create a prompt message follow below example:
        <s>[INST] <<SYS>>\n{your_system_message}\n<</SYS>>\n\n{user_message_1} [/INST] {model_reply_1}</s>
        <s>[INST] {user_message_2} [/INST]
        """
        if DEBUG:
            logger.info("Convert below messages to prompt for Llama 2: ")
            for msg in messages:
                logger.info(msg.model_dump_json())
        bos_token = "<s>"
        eos_token = "</s>"
        prompt = bos_token + "[INST] "
        start = 0
        end_turn = False
        if messages[0].role == "system":
            prompt += "<<SYS>>\n" + messages[0].content + "\n<<SYS>>\n\n"
            start = 1
        # TODO: Add validation
        for i in range(start, len(messages)):
            msg = messages[i]
            if msg.role == "user":
                if end_turn:
                    prompt += bos_token + "[INST] "
                prompt += msg.content + " [/INST] "
                end_turn = False
            else:
                prompt += msg.content + eos_token
                end_turn = True
        if DEBUG:
            logger.info("Converted prompt: " + prompt.replace("\n", "\\n"))
        return prompt
    def _parse_args(self, chat_request: ChatRequest) -> dict:
        prompt = self._convert_prompt(chat_request.messages)
        return {
            "prompt": prompt,
            "max_gen_len": chat_request.max_tokens,
            "temperature": chat_request.temperature,
            "top_p": chat_request.top_p,
        }
    def chat(self, chat_request: ChatRequest) -> ChatResponse:
        response = self._invoke_model(
            args=self._parse_args(chat_request), model_id=chat_request.model
        )
        response_body = json.loads(response.get("body").read())
        if DEBUG:
            logger.info("Bedrock response body: " + str(response_body))
        message_id = self._generate_message_id()
        return self._create_response(
            model=chat_request.model,
            message=response_body["generation"],
            message_id=message_id,
            input_tokens=response_body["prompt_token_count"],
            output_tokens=response_body["generation_token_count"],
        )
    def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
        response = self._invoke_model(
            args=self._parse_args(chat_request),
            model_id=chat_request.model,
            with_stream=True,
        )
        msg_id = ""
        chunk_id = 0
        for event in response.get("body"):
            if DEBUG:
                logger.info("Bedrock response chunk: " + str(event))
            chunk = json.loads(event["chunk"]["bytes"])
            chunk_id += 1
            response = self._create_response_stream(
                model=chat_request.model,
                message_id=msg_id,
                chunk_message=chunk["generation"],
                finish_reason=chunk["stop_reason"],
            )
            yield self._stream_response_to_bytes(response)
 class MistralModel(BedrockModel):
    def _convert_prompt(self, messages: list[ChatRequestMessage]) -> str:
        """Create a prompt message follow below example:
        <s>[INST] {your_system_message}\n{user_message_1} [/INST] {model_reply_1}</s>
        <s>[INST] {user_message_2} [/INST]
        """
        if DEBUG:
            logger.info("Convert below messages to prompt for Llama 2: ")
            for msg in messages:
                logger.info(msg.model_dump_json())
        bos_token = "<s>"
        eos_token = "</s>"
        prompt = bos_token + "[INST] "
        start = 0
        end_turn = False
        if messages[0].role == "system":
            prompt += messages[0].content + "\n"
            start = 1
        # TODO: Add validation
        for i in range(start, len(messages)):
            msg = messages[i]
            if msg.role == "user":
                if end_turn:
                    prompt += bos_token + "[INST] "
                prompt += msg.content + " [/INST] "
                end_turn = False
            else:
                prompt += msg.content + eos_token
                end_turn = True
        if DEBUG:
            logger.info("Converted prompt: " + prompt.replace("\n", "\\n"))
        return prompt
    def _parse_args(self, chat_request: ChatRequest) -> dict:
        prompt = self._convert_prompt(chat_request.messages)
        return {
            "prompt": prompt,
            "max_tokens": chat_request.max_tokens,
            "temperature": chat_request.temperature,
            "top_p": chat_request.top_p,
        }
    def chat(self, chat_request: ChatRequest) -> ChatResponse:
        response = self._invoke_model(
            args=self._parse_args(chat_request), model_id=chat_request.model
        )
        response_body = json.loads(response.get("body").read())
        if DEBUG:
            logger.info("Bedrock response body: " + str(response_body))
        message_id = self._generate_message_id()
        return self._create_response(
            model=chat_request.model,
            message=response_body["outputs"][0]["text"],
            message_id=message_id,
        )
    def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
        response = self._invoke_model(
            args=self._parse_args(chat_request),
            model_id=chat_request.model,
            with_stream=True,
        )
        msg_id = ""
        chunk_id = 0
        for event in response.get("body"):
            if DEBUG:
                logger.info("Bedrock response chunk: " + str(event))
            chunk = json.loads(event["chunk"]["bytes"])
            chunk_id += 1
            response = self._create_response_stream(
                model=chat_request.model,
                message_id=msg_id,
                chunk_message=chunk["outputs"][0]["text"],
                finish_reason=chunk["outputs"][0]["stop_reason"],
            )
            yield self._stream_response_to_bytes(response)
--- a/src/api/routers/init.py
+++ b/src/api/routers/init.py
--- a/src/api/routers/chat.py
+++ b/src/api/routers/chat.py
@@ -0,0 +1,51 @@
 from typing import Annotated
 from fastapi import APIRouter, Depends, Body, HTTPException
 from fastapi.responses import StreamingResponse
 from api.auth import api_key_auth
 from api.models import get_model, SUPPORTED_BEDROCK_MODELS
 from api.schema import ChatRequest, ChatResponse, ChatStreamResponse
 from api.setting import DEFAULT_MODEL
 router = APIRouter()
 router = APIRouter(
    prefix="/chat",
    tags=["items"],
    dependencies=[Depends(api_key_auth)],
    # responses={404: {"description": "Not found"}},
 )
@router.post("/completions", response_model=ChatResponse | ChatStreamResponse)
 async def chat_completions(
        chat_request: Annotated[
            ChatRequest,
            Body(
                examples=[
                    {
                        "model": "anthropic.claude-3-sonnet-20240229-v1:0",
                        "messages": [
                            {"role": "system", "content": "You are a helpful assistant."},
                            {"role": "user", "content": "Hello!"},
                        ],
                    }
                ],
            ),
        ]
 ):
    if chat_request.model.lower().startswith("gpt-"):
        chat_request.model = DEFAULT_MODEL
    if chat_request.model not in SUPPORTED_BEDROCK_MODELS.keys():
        raise HTTPException(status_code=400, detail="Unsupported Model Id " + chat_request.model)
    try:
        model = get_model(chat_request.model)
        if chat_request.stream:
            return StreamingResponse(
                content=model.chat_stream(chat_request), media_type="text/event-stream"
            )
        return model.chat(chat_request)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
--- a/src/api/routers/model.py
+++ b/src/api/routers/model.py
@@ -0,0 +1,41 @@
 from typing import Annotated
 from fastapi import APIRouter, Depends, HTTPException, Path
 from api.auth import api_key_auth
 from api.models import SUPPORTED_BEDROCK_MODELS
 from api.schema import Models, Model
 router = APIRouter()
 router = APIRouter(
    prefix="/models",
    tags=["items"],
    dependencies=[Depends(api_key_auth)],
    # responses={404: {"description": "Not found"}},
 )
 async def validate_model_id(model_id: str):
    if model_id not in SUPPORTED_BEDROCK_MODELS.keys():
        raise HTTPException(status_code=400, detail="Unsupported Model Id")
@router.get("/", response_model=Models)
 async def list_models():
    model_list = [Model(id=model_id) for model_id in SUPPORTED_BEDROCK_MODELS.keys()]
    return Models(data=model_list)
@router.get(
    "/{model_id}",
    response_model=Model,
 )
 async def get_model(
    model_id: Annotated[
        str,
        Path(description="Model ID", example="anthropic.claude-3-sonnet-20240229-v1:0"),
    ]
 ):
    await validate_model_id(model_id)
    return Model(id=model_id)
--- a/src/api/schema.py
+++ b/src/api/schema.py
@@ -0,0 +1,80 @@
 import time
 from typing import Literal
 from pydantic import BaseModel, Field
 class Model(BaseModel):
    id: str
    created: int = Field(default_factory=lambda: int(time.time()))
    object: str | None = "model"
    owned_by: str | None = "bedrock"
 class Models(BaseModel):
    object: str | None = "list"
    data: list[Model] = []
 class ChatRequestMessage(BaseModel):
    name: str | None = None
    role: Literal["user", "assistant", "system"]
    content: str
 class ChatRequest(BaseModel):
    messages: list[ChatRequestMessage]
    model: str
    frequency_penalty: float | None = Field(default=0.0, le=2.0, ge=-2.0)  # Not used
    presence_penalty: float | None = Field(default=0.0, le=2.0, ge=-2.0)  # Not used
    stream: bool | None = False
    temperature: float | None = Field(default=1.0, le=2.0, ge=0.0)
    top_p: float | None = Field(default=1.0, le=1.0, ge=0.0)
    user: str | None = None  # Not used
    max_tokens: int | None = 2048
    n: int | None = 1  # Not used
 class Usage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
 class ChatResponseMessage(BaseModel):
    # tool_calls
    role: Literal["assistant"] | None = None
    content: str | None = None
 class BaseChoice(BaseModel):
    index: int
    finish_reason: str | None
    logprobs: dict | None = None
 class Choice(BaseChoice):
    message: ChatResponseMessage
 class ChoiceDelta(BaseChoice):
    delta: ChatResponseMessage
 class BaseChatResponse(BaseModel):
    # id: str = Field(default_factory=lambda: "chatcmpl-" + str(uuid.uuid4())[:8])
    id: str
    created: int = Field(default_factory=lambda: int(time.time()))
    model: str
    system_fingerprint: str = "fp_e97c09dd4e26"
 class ChatResponse(BaseChatResponse):
    choices: list[Choice]
    object: Literal["chat.completion"] = "chat.completion"
    usage: Usage
 class ChatStreamResponse(BaseChatResponse):
    choices: list[ChoiceDelta]
    object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
--- a/src/api/setting.py
+++ b/src/api/setting.py
@@ -0,0 +1,27 @@
 import os
 DEFAULT_API_KEYS = "bedrock"
 API_ROUTE_PREFIX = "/api/v1"
 TITLE = "Amazon Bedrock Proxy APIs"
 SUMMARY = "OpenAI-Compatible RESTful APIs for Amazon Bedrock"
 VERSION = "0.1.0"
 DESCRIPTION = """
 Use OpenAI-Compatible RESTful APIs for Amazon Bedrock models.
 List of Amazon Bedrock models currently supported:
 - anthropic.claude-instant-v1
 - anthropic.claude-v2:1
 - anthropic.claude-v2
 - anthropic.claude-3-sonnet-20240229-v1:0
 - anthropic.claude-3-haiku-20240307-v1:0
 - meta.llama2-13b-chat-v1
 - meta.llama2-70b-chat-v1
 - mistral.mistral-7b-instruct-v0:2
 - mistral.mixtral-8x7b-instruct-v0:1
 """
 DEBUG = os.environ.get("DEBUG", "false").lower() != "false"
 AWS_REGION = os.environ.get("AWS_REGION", "us-west-2")
 DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "anthropic.claude-3-sonnet-20240229-v1:0")
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -0,0 +1,4 @@
 fastapi==0.103.0
 pydantic==2.6.3
 uvicorn==0.27.0.post1
 mangum==0.17.0
		`@@ -0,0 +1 @@`
							`from api.models.bedrock import ClaudeModel, SUPPORTED_BEDROCK_MODELS, get_model`