From d4938a0af2f48389f4ec4af12139b988e2627e36 Mon Sep 17 00:00:00 2001
From: Aiden Dai <daixb@amazon.com>
Date: Mon, 16 Dec 2024 16:01:19 +0800
Subject: [PATCH] Automatically detect model list

---
 src/api/models/bedrock.py | 425 ++++++++++----------------------------
 src/api/setting.py        |   8 +-
 src/requirements.txt      |   7 +-
 3 files changed, 108 insertions(+), 332 deletions(-)

diff --git a/src/api/models/bedrock.py b/src/api/models/bedrock.py
index f59856a..dff1c5b 100644
--- a/src/api/models/bedrock.py
+++ b/src/api/models/bedrock.py
@@ -7,10 +7,10 @@ from abc import ABC
 from typing import AsyncIterable, Iterable, Literal
 
 import boto3
-from botocore.config import Config
 import numpy as np
 import requests
 import tiktoken
+from botocore.config import Config
 from fastapi import HTTPException
 
 from api.models.base import BaseChatModel, BaseEmbeddingsModel
@@ -37,9 +37,8 @@ from api.schema import (
     EmbeddingsUsage,
     Embedding,
 
-
 )
-from api.setting import DEBUG, AWS_REGION
+from api.setting import DEBUG, AWS_REGION, ENABLE_CROSS_REGION_INFERENCE
 
 logger = logging.getLogger(__name__)
 
@@ -50,6 +49,21 @@ bedrock_runtime = boto3.client(
     region_name=AWS_REGION,
     config=config,
 )
+bedrock_client = boto3.client(
+    service_name='bedrock',
+    region_name=AWS_REGION,
+    config=config,
+)
+
+
+def get_inference_region_prefix():
+    if AWS_REGION.startswith('ap-'):
+        return 'apac'
+    return AWS_REGION[:2]
+
+
+# https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html
+cr_inference_prefix = get_inference_region_prefix()
 
 SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
     "cohere.embed-multilingual-v3": "Cohere Embed Multilingual",
@@ -62,296 +76,78 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
 ENCODER = tiktoken.get_encoding("cl100k_base")
 
 
+def list_bedrock_models() -> dict:
+    """Automatically getting a list of supported models.
+
+    Returns a model list combines:
+        - ON_DEMAND models.
+        - Cross-Region Inference Profiles (if enabled via Env)
+    """
+    model_list = {}
+    try:
+        profile_list = []
+        if ENABLE_CROSS_REGION_INFERENCE:
+            # List system defined inference profile IDs
+            response = bedrock_client.list_inference_profiles(
+                maxResults=1000,
+                typeEquals='SYSTEM_DEFINED'
+            )
+            profile_list = [p['inferenceProfileId'] for p in response['inferenceProfileSummaries']]
+
+        # List foundation models, only cares about text outputs here.
+        response = bedrock_client.list_foundation_models(
+            byOutputModality='TEXT'
+        )
+
+        for model in response['modelSummaries']:
+            model_id = model.get('modelId', 'N/A')
+            stream_supported = model.get('responseStreamingSupported', True)
+            status = model['modelLifecycle'].get('status', 'ACTIVE')
+
+            # currently, use this to filter out rerank models and legacy models
+            if not stream_supported or status != "ACTIVE":
+                continue
+
+            inference_types = model.get('inferenceTypesSupported', [])
+            input_modalities = model['inputModalities']
+            # Add on-demand model list
+            if 'ON_DEMAND' in inference_types:
+                model_list[model_id] = {
+                    'modalities': input_modalities
+                }
+
+            # Add cross-region inference model list.
+            profile_id = cr_inference_prefix + '.' + model_id
+            if profile_id in profile_list:
+                model_list[profile_id] = {
+                    'modalities': input_modalities
+                }
+
+    except Exception as e:
+        logger.error(f"Unable to list models: {str(e)}")
+
+    return model_list
+
+
+# Initialize the model list.
+bedrock_model_list = list_bedrock_models()
+
+
 class BedrockModel(BaseChatModel):
-    # https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html#conversation-inference-supported-models-features
-    _supported_models = {
-        "amazon.titan-text-premier-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-instant-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-v2:1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-v2": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-opus-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-5-sonnet-20241022-v2:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "meta.llama2-13b-chat-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama2-70b-chat-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.1 8b cross-region inference profile
-        "us.meta.llama3-1-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.1 70b cross-region inference profile
-        "us.meta.llama3-1-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-405b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 1B cross-region inference profile
-        "us.meta.llama3-2-1b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 3B cross-region inference profile
-        "us.meta.llama3-2-3b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 11B cross-region inference profile
-        "us.meta.llama3-2-11b-instruct-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 90B cross-region inference profile
-        "us.meta.llama3-2-90b-instruct-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-7b-instruct-v0:2": {
-            "system": False,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mixtral-8x7b-instruct-v0:1": {
-            "system": False,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-small-2402-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-large-2402-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-large-2407-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "cohere.command-r-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "cohere.command-r-plus-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "apac.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "apac.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Haiku cross-region inference profile
-        "us.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Opus cross-region inference profile
-        "us.anthropic.claude-3-opus-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Sonnet cross-region inference profile
-        "us.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3.5 Sonnet cross-region inference profile
-        "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3.5 Sonnet v2 cross-region inference profile(Now only us-west-2)
-        "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # Amazon Nova models - AWS's proprietary large language models
-        "us.amazon.nova-lite-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": True,  # Capable of processing both text and images
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "us.amazon.nova-micro-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": False, # Text-only model, no image processing capabilities
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "us.amazon.nova-pro-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": True,  # Capable of processing both text and images
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-    }
 
     def list_models(self) -> list[str]:
-        return list(self._supported_models.keys())
+        """Always refresh the latest model list"""
+        global bedrock_model_list
+        bedrock_model_list = list_bedrock_models()
+        return list(bedrock_model_list.keys())
 
     def validate(self, chat_request: ChatRequest):
         """Perform basic validation on requests"""
         error = ""
         # check if model is supported
-        if chat_request.model not in self._supported_models.keys():
+        if chat_request.model not in bedrock_model_list.keys():
             error = f"Unsupported model {chat_request.model}, please use models API to get a list of supported models"
 
-        # check if tool call is supported
-        elif chat_request.tools and not self._is_tool_call_supported(chat_request.model, stream=chat_request.stream):
-            tool_call_info = "Tool call with streaming" if chat_request.stream else "Tool call"
-            error = f"{tool_call_info} is currently not supported by {chat_request.model}"
-
         if error:
             raise HTTPException(
                 status_code=400,
@@ -529,31 +325,29 @@ class BedrockModel(BaseChatModel):
                 continue
         return self._reframe_multi_payloard(messages)
 
-
     def _reframe_multi_payloard(self, messages: list) -> list:
         """ Receive messages and reformat them to comply with the Claude format
 
-With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but
-with Claude format requests, you cannot repeatedly receive messages from the same role.
+        With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but
+        with Claude format requests, you cannot repeatedly receive messages from the same role.
 
-This method searches through the OpenAI format messages in order and reformats them to the Claude format.
+        This method searches through the OpenAI format messages in order and reformats them to the Claude format.
 
-```
-openai_format_messages=[
-{"role": "user", "content": "hogehoge"},
-{"role": "user", "content": "fugafuga"},
-]
+        ```
+        openai_format_messages=[
+            {"role": "user", "content": "Hello"},
+            {"role": "user", "content": "Who are you?"},
+        ]
 
-bedrock_format_messages=[
-{
-    "role": "user",
-    "content": [
-        {"text": "hogehoge"},
-        {"text": "fugafuga"}
-    ]
-},
-]
-```
+        bedrock_format_messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Hello"},
+                    {"text": "Who are you?"}
+                ]
+            },
+        ]
         """
         reformatted_messages = []
         current_role = None
@@ -590,7 +384,6 @@ bedrock_format_messages=[
 
         return reformatted_messages
 
-
     def _parse_request(self, chat_request: ChatRequest) -> dict:
         """Create default converse request body.
 
@@ -839,7 +632,7 @@ bedrock_format_messages=[
                     }
                 )
             elif isinstance(part, ImageContent):
-                if not self._is_multimodal_supported(model_id):
+                if not self.is_supported_modality(model_id, modality="IMAGE"):
                     raise HTTPException(
                         status_code=400,
                         detail=f"Multimodal message is currently not supported by {model_id}",
@@ -858,23 +651,13 @@ bedrock_format_messages=[
                 continue
         return content_parts
 
-    def _is_tool_call_supported(self, model_id: str, stream: bool = False) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["stream_tool_call"] if stream else feature["tool_call"]
-
-    def _is_multimodal_supported(self, model_id: str) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["multimodal"]
-
-    def _is_system_prompt_supported(self, model_id: str) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["system"]
+    @staticmethod
+    def is_supported_modality(model_id: str, modality: str = "IMAGE") -> bool:
+        model = bedrock_model_list.get(model_id)
+        modalities = model.get('modalities', [])
+        if modality in modalities:
+            return True
+        return False
 
     def _convert_tool_spec(self, func: Function) -> dict:
         return {
diff --git a/src/api/setting.py b/src/api/setting.py
index 408eff5..9026202 100644
--- a/src/api/setting.py
+++ b/src/api/setting.py
@@ -9,13 +9,6 @@ SUMMARY = "OpenAI-Compatible RESTful APIs for Amazon Bedrock"
 VERSION = "0.1.0"
 DESCRIPTION = """
 Use OpenAI-Compatible RESTful APIs for Amazon Bedrock models.
-
-List of Amazon Bedrock models currently supported:
-- Anthropic Claude 2 / 3 /3.5 (Haiku/Sonnet/Opus)
-- Meta Llama 2 / 3
-- Mistral / Mixtral
-- Cohere Command R / R+
-- Cohere Embedding
 """
 
 DEBUG = os.environ.get("DEBUG", "false").lower() != "false"
@@ -26,3 +19,4 @@ DEFAULT_MODEL = os.environ.get(
 DEFAULT_EMBEDDING_MODEL = os.environ.get(
     "DEFAULT_EMBEDDING_MODEL", "cohere.embed-multilingual-v3"
 )
+ENABLE_CROSS_REGION_INFERENCE = os.environ.get("ENABLE_CROSS_REGION_INFERENCE", "true").lower() != "false"
diff --git a/src/requirements.txt b/src/requirements.txt
index be7a2e7..9d2b47c 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,10 +1,9 @@
-fastapi==0.111.0
+fastapi==0.115.6
 pydantic==2.7.1
 uvicorn==0.29.0
 mangum==0.17.0
 tiktoken==0.6.0
 requests==2.32.3
 numpy==1.26.4
-boto3==1.35.49
-botocore==1.35.49
-
+boto3==1.35.81
+botocore==1.35.81
\ No newline at end of file