From d4938a0af2f48389f4ec4af12139b988e2627e36 Mon Sep 17 00:00:00 2001 From: Aiden Dai Date: Mon, 16 Dec 2024 16:01:19 +0800 Subject: [PATCH] Automatically detect model list --- src/api/models/bedrock.py | 425 ++++++++++---------------------------- src/api/setting.py | 8 +- src/requirements.txt | 7 +- 3 files changed, 108 insertions(+), 332 deletions(-) diff --git a/src/api/models/bedrock.py b/src/api/models/bedrock.py index f59856a..dff1c5b 100644 --- a/src/api/models/bedrock.py +++ b/src/api/models/bedrock.py @@ -7,10 +7,10 @@ from abc import ABC from typing import AsyncIterable, Iterable, Literal import boto3 -from botocore.config import Config import numpy as np import requests import tiktoken +from botocore.config import Config from fastapi import HTTPException from api.models.base import BaseChatModel, BaseEmbeddingsModel @@ -37,9 +37,8 @@ from api.schema import ( EmbeddingsUsage, Embedding, - ) -from api.setting import DEBUG, AWS_REGION +from api.setting import DEBUG, AWS_REGION, ENABLE_CROSS_REGION_INFERENCE logger = logging.getLogger(__name__) @@ -50,6 +49,21 @@ bedrock_runtime = boto3.client( region_name=AWS_REGION, config=config, ) +bedrock_client = boto3.client( + service_name='bedrock', + region_name=AWS_REGION, + config=config, +) + + +def get_inference_region_prefix(): + if AWS_REGION.startswith('ap-'): + return 'apac' + return AWS_REGION[:2] + + +# https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html +cr_inference_prefix = get_inference_region_prefix() SUPPORTED_BEDROCK_EMBEDDING_MODELS = { "cohere.embed-multilingual-v3": "Cohere Embed Multilingual", @@ -62,296 +76,78 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = { ENCODER = tiktoken.get_encoding("cl100k_base") +def list_bedrock_models() -> dict: + """Automatically getting a list of supported models. + + Returns a model list combines: + - ON_DEMAND models. + - Cross-Region Inference Profiles (if enabled via Env) + """ + model_list = {} + try: + profile_list = [] + if ENABLE_CROSS_REGION_INFERENCE: + # List system defined inference profile IDs + response = bedrock_client.list_inference_profiles( + maxResults=1000, + typeEquals='SYSTEM_DEFINED' + ) + profile_list = [p['inferenceProfileId'] for p in response['inferenceProfileSummaries']] + + # List foundation models, only cares about text outputs here. + response = bedrock_client.list_foundation_models( + byOutputModality='TEXT' + ) + + for model in response['modelSummaries']: + model_id = model.get('modelId', 'N/A') + stream_supported = model.get('responseStreamingSupported', True) + status = model['modelLifecycle'].get('status', 'ACTIVE') + + # currently, use this to filter out rerank models and legacy models + if not stream_supported or status != "ACTIVE": + continue + + inference_types = model.get('inferenceTypesSupported', []) + input_modalities = model['inputModalities'] + # Add on-demand model list + if 'ON_DEMAND' in inference_types: + model_list[model_id] = { + 'modalities': input_modalities + } + + # Add cross-region inference model list. + profile_id = cr_inference_prefix + '.' + model_id + if profile_id in profile_list: + model_list[profile_id] = { + 'modalities': input_modalities + } + + except Exception as e: + logger.error(f"Unable to list models: {str(e)}") + + return model_list + + +# Initialize the model list. +bedrock_model_list = list_bedrock_models() + + class BedrockModel(BaseChatModel): - # https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html#conversation-inference-supported-models-features - _supported_models = { - "amazon.titan-text-premier-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "anthropic.claude-instant-v1": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "anthropic.claude-v2:1": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "anthropic.claude-v2": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "anthropic.claude-3-sonnet-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "anthropic.claude-3-opus-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "anthropic.claude-3-haiku-20240307-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "anthropic.claude-3-5-sonnet-20240620-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "anthropic.claude-3-5-sonnet-20241022-v2:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "meta.llama2-13b-chat-v1": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "meta.llama2-70b-chat-v1": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "meta.llama3-8b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "meta.llama3-70b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - # Llama 3.1 8b cross-region inference profile - "us.meta.llama3-1-8b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "meta.llama3-1-8b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - # Llama 3.1 70b cross-region inference profile - "us.meta.llama3-1-70b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "meta.llama3-1-70b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "meta.llama3-1-405b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - # Llama 3.2 1B cross-region inference profile - "us.meta.llama3-2-1b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - # Llama 3.2 3B cross-region inference profile - "us.meta.llama3-2-3b-instruct-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - # Llama 3.2 11B cross-region inference profile - "us.meta.llama3-2-11b-instruct-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": False, - }, - # Llama 3.2 90B cross-region inference profile - "us.meta.llama3-2-90b-instruct-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": False, - }, - "mistral.mistral-7b-instruct-v0:2": { - "system": False, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "mistral.mixtral-8x7b-instruct-v0:1": { - "system": False, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "mistral.mistral-small-2402-v1:0": { - "system": True, - "multimodal": False, - "tool_call": False, - "stream_tool_call": False, - }, - "mistral.mistral-large-2402-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "mistral.mistral-large-2407-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "cohere.command-r-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "cohere.command-r-plus-v1:0": { - "system": True, - "multimodal": False, - "tool_call": True, - "stream_tool_call": False, - }, - "apac.anthropic.claude-3-sonnet-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "apac.anthropic.claude-3-haiku-20240307-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "apac.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # claude 3 Haiku cross-region inference profile - "us.anthropic.claude-3-haiku-20240307-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "eu.anthropic.claude-3-haiku-20240307-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # claude 3 Opus cross-region inference profile - "us.anthropic.claude-3-opus-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # claude 3 Sonnet cross-region inference profile - "us.anthropic.claude-3-sonnet-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "eu.anthropic.claude-3-sonnet-20240229-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # claude 3.5 Sonnet cross-region inference profile - "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # claude 3.5 Sonnet v2 cross-region inference profile(Now only us-west-2) - "us.anthropic.claude-3-5-sonnet-20241022-v2:0": { - "system": True, - "multimodal": True, - "tool_call": True, - "stream_tool_call": True, - }, - # Amazon Nova models - AWS's proprietary large language models - "us.amazon.nova-lite-v1:0": { - "system": True, # Supports system prompts for context setting - "multimodal": True, # Capable of processing both text and images - "tool_call": True, - "stream_tool_call": True, - }, - "us.amazon.nova-micro-v1:0": { - "system": True, # Supports system prompts for context setting - "multimodal": False, # Text-only model, no image processing capabilities - "tool_call": True, - "stream_tool_call": True, - }, - "us.amazon.nova-pro-v1:0": { - "system": True, # Supports system prompts for context setting - "multimodal": True, # Capable of processing both text and images - "tool_call": True, - "stream_tool_call": True, - }, - } def list_models(self) -> list[str]: - return list(self._supported_models.keys()) + """Always refresh the latest model list""" + global bedrock_model_list + bedrock_model_list = list_bedrock_models() + return list(bedrock_model_list.keys()) def validate(self, chat_request: ChatRequest): """Perform basic validation on requests""" error = "" # check if model is supported - if chat_request.model not in self._supported_models.keys(): + if chat_request.model not in bedrock_model_list.keys(): error = f"Unsupported model {chat_request.model}, please use models API to get a list of supported models" - # check if tool call is supported - elif chat_request.tools and not self._is_tool_call_supported(chat_request.model, stream=chat_request.stream): - tool_call_info = "Tool call with streaming" if chat_request.stream else "Tool call" - error = f"{tool_call_info} is currently not supported by {chat_request.model}" - if error: raise HTTPException( status_code=400, @@ -529,31 +325,29 @@ class BedrockModel(BaseChatModel): continue return self._reframe_multi_payloard(messages) - def _reframe_multi_payloard(self, messages: list) -> list: """ Receive messages and reformat them to comply with the Claude format -With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but -with Claude format requests, you cannot repeatedly receive messages from the same role. + With OpenAI format requests, it's not a problem to repeatedly receive messages from the same role, but + with Claude format requests, you cannot repeatedly receive messages from the same role. -This method searches through the OpenAI format messages in order and reformats them to the Claude format. + This method searches through the OpenAI format messages in order and reformats them to the Claude format. -``` -openai_format_messages=[ -{"role": "user", "content": "hogehoge"}, -{"role": "user", "content": "fugafuga"}, -] + ``` + openai_format_messages=[ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Who are you?"}, + ] -bedrock_format_messages=[ -{ - "role": "user", - "content": [ - {"text": "hogehoge"}, - {"text": "fugafuga"} - ] -}, -] -``` + bedrock_format_messages=[ + { + "role": "user", + "content": [ + {"text": "Hello"}, + {"text": "Who are you?"} + ] + }, + ] """ reformatted_messages = [] current_role = None @@ -590,7 +384,6 @@ bedrock_format_messages=[ return reformatted_messages - def _parse_request(self, chat_request: ChatRequest) -> dict: """Create default converse request body. @@ -839,7 +632,7 @@ bedrock_format_messages=[ } ) elif isinstance(part, ImageContent): - if not self._is_multimodal_supported(model_id): + if not self.is_supported_modality(model_id, modality="IMAGE"): raise HTTPException( status_code=400, detail=f"Multimodal message is currently not supported by {model_id}", @@ -858,23 +651,13 @@ bedrock_format_messages=[ continue return content_parts - def _is_tool_call_supported(self, model_id: str, stream: bool = False) -> bool: - feature = self._supported_models.get(model_id) - if not feature: - return False - return feature["stream_tool_call"] if stream else feature["tool_call"] - - def _is_multimodal_supported(self, model_id: str) -> bool: - feature = self._supported_models.get(model_id) - if not feature: - return False - return feature["multimodal"] - - def _is_system_prompt_supported(self, model_id: str) -> bool: - feature = self._supported_models.get(model_id) - if not feature: - return False - return feature["system"] + @staticmethod + def is_supported_modality(model_id: str, modality: str = "IMAGE") -> bool: + model = bedrock_model_list.get(model_id) + modalities = model.get('modalities', []) + if modality in modalities: + return True + return False def _convert_tool_spec(self, func: Function) -> dict: return { diff --git a/src/api/setting.py b/src/api/setting.py index 408eff5..9026202 100644 --- a/src/api/setting.py +++ b/src/api/setting.py @@ -9,13 +9,6 @@ SUMMARY = "OpenAI-Compatible RESTful APIs for Amazon Bedrock" VERSION = "0.1.0" DESCRIPTION = """ Use OpenAI-Compatible RESTful APIs for Amazon Bedrock models. - -List of Amazon Bedrock models currently supported: -- Anthropic Claude 2 / 3 /3.5 (Haiku/Sonnet/Opus) -- Meta Llama 2 / 3 -- Mistral / Mixtral -- Cohere Command R / R+ -- Cohere Embedding """ DEBUG = os.environ.get("DEBUG", "false").lower() != "false" @@ -26,3 +19,4 @@ DEFAULT_MODEL = os.environ.get( DEFAULT_EMBEDDING_MODEL = os.environ.get( "DEFAULT_EMBEDDING_MODEL", "cohere.embed-multilingual-v3" ) +ENABLE_CROSS_REGION_INFERENCE = os.environ.get("ENABLE_CROSS_REGION_INFERENCE", "true").lower() != "false" diff --git a/src/requirements.txt b/src/requirements.txt index be7a2e7..9d2b47c 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,10 +1,9 @@ -fastapi==0.111.0 +fastapi==0.115.6 pydantic==2.7.1 uvicorn==0.29.0 mangum==0.17.0 tiktoken==0.6.0 requests==2.32.3 numpy==1.26.4 -boto3==1.35.49 -botocore==1.35.49 - +boto3==1.35.81 +botocore==1.35.81 \ No newline at end of file