feat: add Amazon Nova 2 multimodal embeddings support (#222)
* feat: add Amazon Nova 2 multimodal embeddings support Adds support for `amazon.nova-2-multimodal-embeddings-v1:0` via the new `NovaEmbeddingsModel` class, using the `taskType`/`singleEmbeddingParams` request format documented in the Nova 2 user guide. - Supports single and batch text inputs - Respects the `dimensions` parameter (256/512/1024/2048/3072, default 3072) - Supports `float` and `base64` encoding formats - Includes `test_nova_embed.py` for quick end-to-end verification Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: remove test script from repo Test script moved to PR description instead. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: validate Nova embedding dimensions and fix falsy-zero bug - Add VALID_DIMENSIONS set and upfront validation with a clear error message - Fix `dimensions or DEFAULT` which would incorrectly ignore dimensions=0 - Add inline comment explaining approximate token counting (Nova API does not return token counts in the response) * fix: address PR review comments for NovaEmbeddingsModel - Fix VALID_DIMENSIONS to {256, 384, 1024, 3072} per Nova embeddings schema docs (previous values 512/2048 were mistakenly referenced from Titan embedding model docs) - Replace str(item) fallback with HTTPException(400) to avoid silent garbage embeddings - Update schema.py dimensions comment: 'not used' -> 'Used by Nova embeddings' - Replace getattr() with direct .dimensions access on Pydantic model - Move dimension validation before the loop (validates once, not per-text) - Add enumerate to batch loop; include input index in error detail - Switch isinstance(item, Iterable) to isinstance(item, list) for precise matching - Add comment explaining embeddingPurpose hardcoded to GENERIC_INDEX --------- Co-authored-by: Gabriel <gabrielkoo@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,6 +81,7 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
|
||||
"amazon.titan-embed-text-v2:0": "Titan Embeddings G2 - Text",
|
||||
# Disable Titan embedding.
|
||||
# "amazon.titan-embed-image-v1": "Titan Multimodal Embeddings G1"
|
||||
"amazon.nova-2-multimodal-embeddings-v1:0": "Nova Multimodal Embeddings V2",
|
||||
}
|
||||
|
||||
ENCODER = tiktoken.get_encoding("cl100k_base")
|
||||
@@ -1398,6 +1399,90 @@ class TitanEmbeddingsModel(BedrockEmbeddingsModel):
|
||||
)
|
||||
|
||||
|
||||
class NovaEmbeddingsModel(BedrockEmbeddingsModel):
|
||||
# Per https://docs.aws.amazon.com/nova/latest/userguide/embeddings-schema.html
|
||||
VALID_DIMENSIONS = {256, 384, 1024, 3072}
|
||||
DEFAULT_DIMENSION = 3072
|
||||
|
||||
def _parse_args(self, text: str, dimensions: int | None = None) -> dict:
|
||||
dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
|
||||
return {
|
||||
"taskType": "SINGLE_EMBEDDING",
|
||||
"singleEmbeddingParams": {
|
||||
# Nova supports 9 embeddingPurpose values; GENERIC_INDEX is the
|
||||
# general-purpose default suitable for most retrieval use cases.
|
||||
"embeddingPurpose": "GENERIC_INDEX",
|
||||
"embeddingDimension": dim,
|
||||
"text": {
|
||||
"truncationMode": "END",
|
||||
"value": text,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
def embed(self, embeddings_request: EmbeddingsRequest) -> EmbeddingsResponse:
|
||||
if isinstance(embeddings_request.input, str):
|
||||
texts = [embeddings_request.input]
|
||||
elif isinstance(embeddings_request.input, list):
|
||||
if len(embeddings_request.input) == 0:
|
||||
raise HTTPException(status_code=400, detail="Input list cannot be empty")
|
||||
# Decode token arrays if needed
|
||||
texts = []
|
||||
for item in embeddings_request.input:
|
||||
if isinstance(item, str):
|
||||
texts.append(item)
|
||||
elif isinstance(item, int):
|
||||
texts.append(ENCODER.decode([item]))
|
||||
elif isinstance(item, list):
|
||||
texts.append(ENCODER.decode(item))
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsupported input item type: {type(item).__name__}. Expected str, int, or list of ints.",
|
||||
)
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="Unsupported input type")
|
||||
|
||||
dimensions = embeddings_request.dimensions
|
||||
# Validate dimensions once before the loop — it's constant across all texts
|
||||
dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
|
||||
if dim not in self.VALID_DIMENSIONS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid dimensions {dim}. Must be one of {sorted(self.VALID_DIMENSIONS)}",
|
||||
)
|
||||
|
||||
all_embeddings = []
|
||||
total_tokens = 0
|
||||
|
||||
for idx, text in enumerate(texts):
|
||||
response = self._invoke_model(
|
||||
args=self._parse_args(text, dimensions),
|
||||
model_id=embeddings_request.model,
|
||||
)
|
||||
response_body = json.loads(response.get("body").read())
|
||||
if DEBUG:
|
||||
logger.info("Bedrock response body keys: " + str(list(response_body.keys())))
|
||||
|
||||
# Response: {"embeddings": [{"embeddingType": "TEXT", "embedding": [...]}]}
|
||||
embeddings_list = response_body.get("embeddings", [])
|
||||
if not embeddings_list:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"No embeddings returned from Nova model for input[{idx}]",
|
||||
)
|
||||
all_embeddings.append(embeddings_list[0]["embedding"])
|
||||
# Nova doesn't return token counts in the response; approximate with cl100k_base
|
||||
total_tokens += len(ENCODER.encode(text))
|
||||
|
||||
return self._create_response(
|
||||
embeddings=all_embeddings,
|
||||
model=embeddings_request.model,
|
||||
input_tokens=total_tokens,
|
||||
encoding_format=embeddings_request.encoding_format,
|
||||
)
|
||||
|
||||
|
||||
def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
|
||||
model_name = SUPPORTED_BEDROCK_EMBEDDING_MODELS.get(model_id, "")
|
||||
if DEBUG:
|
||||
@@ -1407,6 +1492,8 @@ def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
|
||||
return CohereEmbeddingsModel()
|
||||
case "Titan Embeddings G2 - Text":
|
||||
return TitanEmbeddingsModel()
|
||||
case "Nova Multimodal Embeddings V2":
|
||||
return NovaEmbeddingsModel()
|
||||
case _:
|
||||
logger.error("Unsupported model id " + model_id)
|
||||
raise HTTPException(
|
||||
|
||||
@@ -182,7 +182,7 @@ class EmbeddingsRequest(BaseModel):
|
||||
input: str | list[str] | Iterable[int | Iterable[int]]
|
||||
model: str
|
||||
encoding_format: Literal["float", "base64"] = "float"
|
||||
dimensions: int | None = None # not used.
|
||||
dimensions: int | None = None # Used by Nova embeddings; ignored by other models.
|
||||
user: str | None = None # not used.
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user