Partial support of reasoning

This commit is contained in:
Aiden Dai
2025-02-25 16:22:26 +08:00
parent b26ee3e9ea
commit 3ce47ff278
3 changed files with 26 additions and 5 deletions

View File

@@ -420,6 +420,19 @@ class BedrockModel(BaseChatModel):
"system": system_prompts, "system": system_prompts,
"inferenceConfig": inference_config, "inferenceConfig": inference_config,
} }
if chat_request.reasoning_effort:
# From OpenAI api, the max_token is not supported in reasoning mode
# Use max_completion_tokens if provided.
max_tokens = chat_request.max_completion_tokens if chat_request.max_completion_tokens else chat_request.max_tokens
inference_config["maxTokens"] = max_tokens
# unset topP - Not supported
inference_config.pop("topP")
args["additionalModelRequestFields"] = {
"reasoning_config": {
"type": "enabled",
"budget_tokens": max_tokens - 1
}
}
# add tool config # add tool config
if chat_request.tools: if chat_request.tools:
args["toolConfig"] = { args["toolConfig"] = {
@@ -476,8 +489,13 @@ class BedrockModel(BaseChatModel):
message.content = None message.content = None
else: else:
message.content = "" message.content = ""
if content: for c in content:
message.content = content[0]["text"] if "reasoningContent" in c:
message.reasoning_content = c["reasoningContent"]["reasoningText"].get("text", "")
if "text" in c:
message.content = c["text"]
else:
logger.warning("Unknown tag in message content " + ",".join(c.keys()))
response = ChatResponse( response = ChatResponse(
id=message_id, id=message_id,

View File

@@ -94,6 +94,8 @@ class ChatRequest(BaseModel):
top_p: float | None = Field(default=1.0, le=1.0, ge=0.0) top_p: float | None = Field(default=1.0, le=1.0, ge=0.0)
user: str | None = None # Not used user: str | None = None # Not used
max_tokens: int | None = 2048 max_tokens: int | None = 2048
max_completion_tokens: int | None = None
reasoning_effort: Literal["low", "medium", "high"] | None = None
n: int | None = 1 # Not used n: int | None = 1 # Not used
tools: list[Tool] | None = None tools: list[Tool] | None = None
tool_choice: str | object = "auto" tool_choice: str | object = "auto"
@@ -111,6 +113,7 @@ class ChatResponseMessage(BaseModel):
role: Literal["assistant"] | None = None role: Literal["assistant"] | None = None
content: str | None = None content: str | None = None
tool_calls: list[ToolCall] | None = None tool_calls: list[ToolCall] | None = None
reasoning_content: str | None = None
class BaseChoice(BaseModel): class BaseChoice(BaseModel):

View File

@@ -1,9 +1,9 @@
fastapi==0.115.6 fastapi==0.115.8
pydantic==2.7.1 pydantic==2.7.1
uvicorn==0.29.0 uvicorn==0.29.0
mangum==0.17.0 mangum==0.17.0
tiktoken==0.6.0 tiktoken==0.6.0
requests==2.32.3 requests==2.32.3
numpy==1.26.4 numpy==1.26.4
boto3==1.35.81 boto3==1.37.0
botocore==1.35.81 botocore==1.37.0