Automatically detect model list

2024-12-16 16:01:19 +08:00
parent cb38d328aa
commit d4938a0af2
3 changed files with 108 additions and 332 deletions
--- a/src/api/models/bedrock.py
+++ b/src/api/models/bedrock.py
@@ -7,10 +7,10 @@ from abc import ABC
 from typing import AsyncIterable, Iterable, Literal

 import boto3
-from botocore.config import Config
 import numpy as np
 import requests
 import tiktoken
+from botocore.config import Config
 from fastapi import HTTPException

 from api.models.base import BaseChatModel, BaseEmbeddingsModel
@@ -37,9 +37,8 @@ from api.schema import (
    EmbeddingsUsage,
    Embedding,

-
 )
-from api.setting import DEBUG, AWS_REGION
+from api.setting import DEBUG, AWS_REGION, ENABLE_CROSS_REGION_INFERENCE

 logger = logging.getLogger(__name__)

@@ -50,6 +49,21 @@ bedrock_runtime = boto3.client(
    region_name=AWS_REGION,
    config=config,
 )
+bedrock_client = boto3.client(
+    service_name='bedrock',
+    region_name=AWS_REGION,
+    config=config,
+)
+
+
+def get_inference_region_prefix():
+    if AWS_REGION.startswith('ap-'):
+        return 'apac'
+    return AWS_REGION[:2]
+
+
+# https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html
+cr_inference_prefix = get_inference_region_prefix()

 SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
    "cohere.embed-multilingual-v3": "Cohere Embed Multilingual",
@@ -62,296 +76,78 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
 ENCODER = tiktoken.get_encoding("cl100k_base")


+def list_bedrock_models() -> dict:
+    """Automatically getting a list of supported models.
+
+    Returns a model list combines:
+        - ON_DEMAND models.
+        - Cross-Region Inference Profiles (if enabled via Env)
+    """
+    model_list = {}
+    try:
+        profile_list = []
+        if ENABLE_CROSS_REGION_INFERENCE:
+            # List system defined inference profile IDs
+            response = bedrock_client.list_inference_profiles(
+                maxResults=1000,
+                typeEquals='SYSTEM_DEFINED'
+            )
+            profile_list = [p['inferenceProfileId'] for p in response['inferenceProfileSummaries']]
+
+        # List foundation models, only cares about text outputs here.
+        response = bedrock_client.list_foundation_models(
+            byOutputModality='TEXT'
+        )
+
+        for model in response['modelSummaries']:
+            model_id = model.get('modelId', 'N/A')
+            stream_supported = model.get('responseStreamingSupported', True)
+            status = model['modelLifecycle'].get('status', 'ACTIVE')
+
+            # currently, use this to filter out rerank models and legacy models
+            if not stream_supported or status != "ACTIVE":
+                continue
+
+            inference_types = model.get('inferenceTypesSupported', [])
+            input_modalities = model['inputModalities']
+            # Add on-demand model list
+            if 'ON_DEMAND' in inference_types:
+                model_list[model_id] = {
+                    'modalities': input_modalities
+                }
+
+            # Add cross-region inference model list.
+            profile_id = cr_inference_prefix + '.' + model_id
+            if profile_id in profile_list:
+                model_list[profile_id] = {
+                    'modalities': input_modalities
+                }
+
+    except Exception as e:
+        logger.error(f"Unable to list models: {str(e)}")
+
+    return model_list
+
+
+# Initialize the model list.
+bedrock_model_list = list_bedrock_models()
+
+
 class BedrockModel(BaseChatModel):
-    # https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html#conversation-inference-supported-models-features
-    _supported_models = {
-        "amazon.titan-text-premier-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-instant-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-v2:1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-v2": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-opus-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "anthropic.claude-3-5-sonnet-20241022-v2:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "meta.llama2-13b-chat-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama2-70b-chat-v1": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.1 8b cross-region inference profile
-        "us.meta.llama3-1-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-8b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.1 70b cross-region inference profile
-        "us.meta.llama3-1-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-70b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "meta.llama3-1-405b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 1B cross-region inference profile
-        "us.meta.llama3-2-1b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 3B cross-region inference profile
-        "us.meta.llama3-2-3b-instruct-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 11B cross-region inference profile
-        "us.meta.llama3-2-11b-instruct-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        # Llama 3.2 90B cross-region inference profile
-        "us.meta.llama3-2-90b-instruct-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-7b-instruct-v0:2": {
-            "system": False,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mixtral-8x7b-instruct-v0:1": {
-            "system": False,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-small-2402-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": False,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-large-2402-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "mistral.mistral-large-2407-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "cohere.command-r-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "cohere.command-r-plus-v1:0": {
-            "system": True,
-            "multimodal": False,
-            "tool_call": True,
-            "stream_tool_call": False,
-        },
-        "apac.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "apac.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Haiku cross-region inference profile
-        "us.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-haiku-20240307-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Opus cross-region inference profile
-        "us.anthropic.claude-3-opus-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3 Sonnet cross-region inference profile
-        "us.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-sonnet-20240229-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3.5 Sonnet cross-region inference profile
-        "us.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # claude 3.5 Sonnet v2 cross-region inference profile(Now only us-west-2)
-        "us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
-            "system": True,
-            "multimodal": True,
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        # Amazon Nova models - AWS's proprietary large language models
-        "us.amazon.nova-lite-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": True,  # Capable of processing both text and images
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "us.amazon.nova-micro-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": False, # Text-only model, no image processing capabilities
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-        "us.amazon.nova-pro-v1:0": {
-            "system": True,      # Supports system prompts for context setting
-            "multimodal": True,  # Capable of processing both text and images
-            "tool_call": True,
-            "stream_tool_call": True,
-        },
-    }

    def list_models(self) -> list[str]:
-        return list(self._supported_models.keys())
+        """Always refresh the latest model list"""
+        global bedrock_model_list
+        bedrock_model_list = list_bedrock_models()
+        return list(bedrock_model_list.keys())

    def validate(self, chat_request: ChatRequest):
        """Perform basic validation on requests"""
        error = ""
        # check if model is supported
-        if chat_request.model not in self._supported_models.keys():
+        if chat_request.model not in bedrock_model_list.keys():
            error = f"Unsupported model {chat_request.model}, please use models API to get a list of supported models"

-        # check if tool call is supported
-        elif chat_request.tools and not self._is_tool_call_supported(chat_request.model, stream=chat_request.stream):
-            tool_call_info = "Tool call with streaming" if chat_request.stream else "Tool call"
-            error = f"{tool_call_info} is currently not supported by {chat_request.model}"
-
        if error:
            raise HTTPException(
                status_code=400,
@@ -529,31 +325,29 @@ class BedrockModel(BaseChatModel):
                continue
        return self._reframe_multi_payloard(messages)

-
    def _reframe_multi_payloard(self, messages: list) -> list:
        """ Receive messages and reformat them to comply with the Claude format

-With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but
-with Claude format requests, you cannot repeatedly receive messages from the same role.
+        With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but
+        with Claude format requests, you cannot repeatedly receive messages from the same role.

-This method searches through the OpenAI format messages in order and reformats them to the Claude format.
+        This method searches through the OpenAI format messages in order and reformats them to the Claude format.

-```
-openai_format_messages=[
-{"role": "user", "content": "hogehoge"},
-{"role": "user", "content": "fugafuga"},
-]
+        ```
+        openai_format_messages=[
+            {"role": "user", "content": "Hello"},
+            {"role": "user", "content": "Who are you?"},
+        ]

-bedrock_format_messages=[
-{
-    "role": "user",
-    "content": [
-        {"text": "hogehoge"},
-        {"text": "fugafuga"}
-    ]
-},
-]
-```
+        bedrock_format_messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"text": "Hello"},
+                    {"text": "Who are you?"}
+                ]
+            },
+        ]
        """
        reformatted_messages = []
        current_role = None
@@ -590,7 +384,6 @@ bedrock_format_messages=[

        return reformatted_messages

-
    def _parse_request(self, chat_request: ChatRequest) -> dict:
        """Create default converse request body.

@@ -839,7 +632,7 @@ bedrock_format_messages=[
                    }
                )
            elif isinstance(part, ImageContent):
-                if not self._is_multimodal_supported(model_id):
+                if not self.is_supported_modality(model_id, modality="IMAGE"):
                    raise HTTPException(
                        status_code=400,
                        detail=f"Multimodal message is currently not supported by {model_id}",
@@ -858,23 +651,13 @@ bedrock_format_messages=[
                continue
        return content_parts

-    def _is_tool_call_supported(self, model_id: str, stream: bool = False) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["stream_tool_call"] if stream else feature["tool_call"]
-
-    def _is_multimodal_supported(self, model_id: str) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["multimodal"]
-
-    def _is_system_prompt_supported(self, model_id: str) -> bool:
-        feature = self._supported_models.get(model_id)
-        if not feature:
-            return False
-        return feature["system"]
+    @staticmethod
+    def is_supported_modality(model_id: str, modality: str = "IMAGE") -> bool:
+        model = bedrock_model_list.get(model_id)
+        modalities = model.get('modalities', [])
+        if modality in modalities:
+            return True
+        return False

    def _convert_tool_spec(self, func: Function) -> dict:
        return {
--- a/src/api/setting.py
+++ b/src/api/setting.py
@@ -9,13 +9,6 @@ SUMMARY = "OpenAI-Compatible RESTful APIs for Amazon Bedrock"
 VERSION = "0.1.0"
 DESCRIPTION = """
 Use OpenAI-Compatible RESTful APIs for Amazon Bedrock models.
-
-List of Amazon Bedrock models currently supported:
- Anthropic Claude 2 / 3 /3.5 (Haiku/Sonnet/Opus)
- Meta Llama 2 / 3
- Mistral / Mixtral
- Cohere Command R / R+
- Cohere Embedding
 """

 DEBUG = os.environ.get("DEBUG", "false").lower() != "false"
@@ -26,3 +19,4 @@ DEFAULT_MODEL = os.environ.get(
 DEFAULT_EMBEDDING_MODEL = os.environ.get(
    "DEFAULT_EMBEDDING_MODEL", "cohere.embed-multilingual-v3"
 )
+ENABLE_CROSS_REGION_INFERENCE = os.environ.get("ENABLE_CROSS_REGION_INFERENCE", "true").lower() != "false"