Files
bedrock-access-gateway/deployment/BedrockProxy.template
Kane Zhu b4800c54a0 feat: add prompt caching support for Claude and Nova models
Add comprehensive prompt caching support with flexible control options:

Features:
- ENV variable control (ENABLE_PROMPT_CACHING, default: false)
- Per-request control via extra_body.prompt_caching
- Pattern-based model detection (Claude, Nova)
- Token limit warnings (Nova 20K limit)
- OpenAI-compatible response format (prompt_tokens_details.cached_tokens)

Supported models:
- Claude 3+ models (anthropic.claude-*)
- Nova models (amazon.nova-*)
- Auto-detection prevents breaking unsupported models

Implementation:
- System prompts caching via extra_body.prompt_caching.system
- Messages caching via extra_body.prompt_caching.messages
- Non-streaming and streaming modes
- Compatible with reasoning, thinking, and tool calls
2025-10-15 11:03:19 +08:00

288 lines
8.5 KiB
Plaintext

Description: Bedrock Access Gateway - OpenAI-compatible RESTful APIs for Amazon Bedrock
Parameters:
ApiKeySecretArn:
Type: String
AllowedPattern: ^arn:aws:secretsmanager:.*$
Description: The secret ARN in Secrets Manager used to store the API Key
ContainerImageUri:
Type: String
Description: The ECR image URI for the Lambda function (e.g., 123456789012.dkr.ecr.us-east-1.amazonaws.com/bedrock-proxy-api:latest)
DefaultModelId:
Type: String
Default: anthropic.claude-3-sonnet-20240229-v1:0
Description: The default model ID, please make sure the model ID is supported in the current region
EnablePromptCaching:
Type: String
Default: "false"
AllowedValues:
- "true"
- "false"
Description: Enable prompt caching for supported models (Claude, Nova). When enabled, adds cachePoint to system prompts and messages for cost savings.
Resources:
VPCB9E5F0B4:
Type: AWS::EC2::VPC
Properties:
CidrBlock: 10.250.0.0/16
EnableDnsHostnames: true
EnableDnsSupport: true
InstanceTenancy: default
Tags:
- Key: Name
Value: BedrockProxy/VPC
VPCPublicSubnet1SubnetB4246D30:
Type: AWS::EC2::Subnet
Properties:
AvailabilityZone:
Fn::Select:
- 0
- Fn::GetAZs: ""
CidrBlock: 10.250.0.0/24
MapPublicIpOnLaunch: true
Tags:
- Key: aws-cdk:subnet-name
Value: Public
- Key: aws-cdk:subnet-type
Value: Public
- Key: Name
Value: BedrockProxy/VPC/PublicSubnet1
VpcId:
Ref: VPCB9E5F0B4
VPCPublicSubnet1RouteTableFEE4B781:
Type: AWS::EC2::RouteTable
Properties:
Tags:
- Key: Name
Value: BedrockProxy/VPC/PublicSubnet1
VpcId:
Ref: VPCB9E5F0B4
VPCPublicSubnet1RouteTableAssociation0B0896DC:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId:
Ref: VPCPublicSubnet1RouteTableFEE4B781
SubnetId:
Ref: VPCPublicSubnet1SubnetB4246D30
VPCPublicSubnet1DefaultRoute91CEF279:
Type: AWS::EC2::Route
Properties:
DestinationCidrBlock: 0.0.0.0/0
GatewayId:
Ref: VPCIGWB7E252D3
RouteTableId:
Ref: VPCPublicSubnet1RouteTableFEE4B781
DependsOn:
- VPCVPCGW99B986DC
VPCPublicSubnet2Subnet74179F39:
Type: AWS::EC2::Subnet
Properties:
AvailabilityZone:
Fn::Select:
- 1
- Fn::GetAZs: ""
CidrBlock: 10.250.1.0/24
MapPublicIpOnLaunch: true
Tags:
- Key: aws-cdk:subnet-name
Value: Public
- Key: aws-cdk:subnet-type
Value: Public
- Key: Name
Value: BedrockProxy/VPC/PublicSubnet2
VpcId:
Ref: VPCB9E5F0B4
VPCPublicSubnet2RouteTable6F1A15F1:
Type: AWS::EC2::RouteTable
Properties:
Tags:
- Key: Name
Value: BedrockProxy/VPC/PublicSubnet2
VpcId:
Ref: VPCB9E5F0B4
VPCPublicSubnet2RouteTableAssociation5A808732:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
RouteTableId:
Ref: VPCPublicSubnet2RouteTable6F1A15F1
SubnetId:
Ref: VPCPublicSubnet2Subnet74179F39
VPCPublicSubnet2DefaultRouteB7481BBA:
Type: AWS::EC2::Route
Properties:
DestinationCidrBlock: 0.0.0.0/0
GatewayId:
Ref: VPCIGWB7E252D3
RouteTableId:
Ref: VPCPublicSubnet2RouteTable6F1A15F1
DependsOn:
- VPCVPCGW99B986DC
VPCIGWB7E252D3:
Type: AWS::EC2::InternetGateway
Properties:
Tags:
- Key: Name
Value: BedrockProxy/VPC
VPCVPCGW99B986DC:
Type: AWS::EC2::VPCGatewayAttachment
Properties:
InternetGatewayId:
Ref: VPCIGWB7E252D3
VpcId:
Ref: VPCB9E5F0B4
ProxyApiHandlerServiceRoleBE71BFB1:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Statement:
- Action: sts:AssumeRole
Effect: Allow
Principal:
Service: lambda.amazonaws.com
Version: "2012-10-17"
ManagedPolicyArns:
- Fn::Join:
- ""
- - "arn:"
- Ref: AWS::Partition
- :iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
ProxyApiHandlerServiceRoleDefaultPolicy86681202:
Type: AWS::IAM::Policy
Properties:
PolicyDocument:
Statement:
- Action:
- bedrock:ListFoundationModels
- bedrock:ListInferenceProfiles
Effect: Allow
Resource: "*"
- Action:
- bedrock:InvokeModel
- bedrock:InvokeModelWithResponseStream
Effect: Allow
Resource:
- arn:aws:bedrock:*::foundation-model/*
- arn:aws:bedrock:*:*:inference-profile/*
- arn:aws:bedrock:*:*:application-inference-profile/*
- Action:
- secretsmanager:GetSecretValue
- secretsmanager:DescribeSecret
Effect: Allow
Resource:
Ref: ApiKeySecretArn
Version: "2012-10-17"
PolicyName: ProxyApiHandlerServiceRoleDefaultPolicy86681202
Roles:
- Ref: ProxyApiHandlerServiceRoleBE71BFB1
ProxyApiHandlerEC15A492:
Type: AWS::Lambda::Function
Properties:
Architectures:
- arm64
Code:
ImageUri:
Ref: ContainerImageUri
Description: Bedrock Proxy API Handler
Environment:
Variables:
DEBUG: "false"
API_KEY_SECRET_ARN:
Ref: ApiKeySecretArn
DEFAULT_MODEL:
Ref: DefaultModelId
DEFAULT_EMBEDDING_MODEL: cohere.embed-multilingual-v3
ENABLE_CROSS_REGION_INFERENCE: "true"
ENABLE_APPLICATION_INFERENCE_PROFILES: "true"
ENABLE_PROMPT_CACHING:
Ref: EnablePromptCaching
MemorySize: 1024
PackageType: Image
Role:
Fn::GetAtt:
- ProxyApiHandlerServiceRoleBE71BFB1
- Arn
Timeout: 600
DependsOn:
- ProxyApiHandlerServiceRoleDefaultPolicy86681202
- ProxyApiHandlerServiceRoleBE71BFB1
ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779:
Type: AWS::Lambda::Permission
Properties:
Action: lambda:InvokeFunction
FunctionName:
Fn::GetAtt:
- ProxyApiHandlerEC15A492
- Arn
Principal: elasticloadbalancing.amazonaws.com
ProxyALB87756780:
Type: AWS::ElasticLoadBalancingV2::LoadBalancer
Properties:
LoadBalancerAttributes:
- Key: deletion_protection.enabled
Value: "false"
Scheme: internet-facing
SecurityGroups:
- Fn::GetAtt:
- ProxyALBSecurityGroup0D6CA3DA
- GroupId
Subnets:
- Ref: VPCPublicSubnet1SubnetB4246D30
- Ref: VPCPublicSubnet2Subnet74179F39
Type: application
DependsOn:
- VPCPublicSubnet1DefaultRoute91CEF279
- VPCPublicSubnet1RouteTableAssociation0B0896DC
- VPCPublicSubnet2DefaultRouteB7481BBA
- VPCPublicSubnet2RouteTableAssociation5A808732
ProxyALBSecurityGroup0D6CA3DA:
Type: AWS::EC2::SecurityGroup
Properties:
GroupDescription: Automatically created Security Group for ELB BedrockProxyALB1CE4CAD1
SecurityGroupEgress:
- CidrIp: 255.255.255.255/32
Description: Disallow all traffic
FromPort: 252
IpProtocol: icmp
ToPort: 86
SecurityGroupIngress:
- CidrIp: 0.0.0.0/0
Description: Allow from anyone on port 80
FromPort: 80
IpProtocol: tcp
ToPort: 80
VpcId:
Ref: VPCB9E5F0B4
ProxyALBListener933E9515:
Type: AWS::ElasticLoadBalancingV2::Listener
Properties:
DefaultActions:
- TargetGroupArn:
Ref: ProxyALBListenerTargetsGroup187739FA
Type: forward
LoadBalancerArn:
Ref: ProxyALB87756780
Port: 80
Protocol: HTTP
ProxyALBListenerTargetsGroup187739FA:
Type: AWS::ElasticLoadBalancingV2::TargetGroup
Properties:
HealthCheckEnabled: false
TargetType: lambda
Targets:
- Id:
Fn::GetAtt:
- ProxyApiHandlerEC15A492
- Arn
DependsOn:
- ProxyApiHandlerInvoke2UTWxhlfyqbT5FTn5jvgbLgjFfJwzswGk55DU1HYF6C33779
Outputs:
APIBaseUrl:
Description: Proxy API Base URL (OPENAI_API_BASE)
Value:
Fn::Join:
- ""
- - http://
- Fn::GetAtt:
- ProxyALB87756780
- DNSName
- /api/v1