Partial support of reasoning
This commit is contained in:
@@ -420,6 +420,19 @@ class BedrockModel(BaseChatModel):
|
|||||||
"system": system_prompts,
|
"system": system_prompts,
|
||||||
"inferenceConfig": inference_config,
|
"inferenceConfig": inference_config,
|
||||||
}
|
}
|
||||||
|
if chat_request.reasoning_effort:
|
||||||
|
# From OpenAI api, the max_token is not supported in reasoning mode
|
||||||
|
# Use max_completion_tokens if provided.
|
||||||
|
max_tokens = chat_request.max_completion_tokens if chat_request.max_completion_tokens else chat_request.max_tokens
|
||||||
|
inference_config["maxTokens"] = max_tokens
|
||||||
|
# unset topP - Not supported
|
||||||
|
inference_config.pop("topP")
|
||||||
|
args["additionalModelRequestFields"] = {
|
||||||
|
"reasoning_config": {
|
||||||
|
"type": "enabled",
|
||||||
|
"budget_tokens": max_tokens - 1
|
||||||
|
}
|
||||||
|
}
|
||||||
# add tool config
|
# add tool config
|
||||||
if chat_request.tools:
|
if chat_request.tools:
|
||||||
args["toolConfig"] = {
|
args["toolConfig"] = {
|
||||||
@@ -476,8 +489,13 @@ class BedrockModel(BaseChatModel):
|
|||||||
message.content = None
|
message.content = None
|
||||||
else:
|
else:
|
||||||
message.content = ""
|
message.content = ""
|
||||||
if content:
|
for c in content:
|
||||||
message.content = content[0]["text"]
|
if "reasoningContent" in c:
|
||||||
|
message.reasoning_content = c["reasoningContent"]["reasoningText"].get("text", "")
|
||||||
|
if "text" in c:
|
||||||
|
message.content = c["text"]
|
||||||
|
else:
|
||||||
|
logger.warning("Unknown tag in message content " + ",".join(c.keys()))
|
||||||
|
|
||||||
response = ChatResponse(
|
response = ChatResponse(
|
||||||
id=message_id,
|
id=message_id,
|
||||||
|
|||||||
@@ -94,6 +94,8 @@ class ChatRequest(BaseModel):
|
|||||||
top_p: float | None = Field(default=1.0, le=1.0, ge=0.0)
|
top_p: float | None = Field(default=1.0, le=1.0, ge=0.0)
|
||||||
user: str | None = None # Not used
|
user: str | None = None # Not used
|
||||||
max_tokens: int | None = 2048
|
max_tokens: int | None = 2048
|
||||||
|
max_completion_tokens: int | None = None
|
||||||
|
reasoning_effort: Literal["low", "medium", "high"] | None = None
|
||||||
n: int | None = 1 # Not used
|
n: int | None = 1 # Not used
|
||||||
tools: list[Tool] | None = None
|
tools: list[Tool] | None = None
|
||||||
tool_choice: str | object = "auto"
|
tool_choice: str | object = "auto"
|
||||||
@@ -111,6 +113,7 @@ class ChatResponseMessage(BaseModel):
|
|||||||
role: Literal["assistant"] | None = None
|
role: Literal["assistant"] | None = None
|
||||||
content: str | None = None
|
content: str | None = None
|
||||||
tool_calls: list[ToolCall] | None = None
|
tool_calls: list[ToolCall] | None = None
|
||||||
|
reasoning_content: str | None = None
|
||||||
|
|
||||||
|
|
||||||
class BaseChoice(BaseModel):
|
class BaseChoice(BaseModel):
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
fastapi==0.115.6
|
fastapi==0.115.8
|
||||||
pydantic==2.7.1
|
pydantic==2.7.1
|
||||||
uvicorn==0.29.0
|
uvicorn==0.29.0
|
||||||
mangum==0.17.0
|
mangum==0.17.0
|
||||||
tiktoken==0.6.0
|
tiktoken==0.6.0
|
||||||
requests==2.32.3
|
requests==2.32.3
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
boto3==1.35.81
|
boto3==1.37.0
|
||||||
botocore==1.35.81
|
botocore==1.37.0
|
||||||
Reference in New Issue
Block a user