feat: add Amazon Nova 2 multimodal embeddings support (#222)

* feat: add Amazon Nova 2 multimodal embeddings support

Adds support for `amazon.nova-2-multimodal-embeddings-v1:0` via the
new `NovaEmbeddingsModel` class, using the `taskType`/`singleEmbeddingParams`
request format documented in the Nova 2 user guide.

- Supports single and batch text inputs
- Respects the `dimensions` parameter (256/512/1024/2048/3072, default 3072)
- Supports `float` and `base64` encoding formats
- Includes `test_nova_embed.py` for quick end-to-end verification

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: remove test script from repo

Test script moved to PR description instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: validate Nova embedding dimensions and fix falsy-zero bug

- Add VALID_DIMENSIONS set and upfront validation with a clear error message
- Fix `dimensions or DEFAULT` which would incorrectly ignore dimensions=0
- Add inline comment explaining approximate token counting (Nova API
  does not return token counts in the response)

* fix: address PR review comments for NovaEmbeddingsModel

- Fix VALID_DIMENSIONS to {256, 384, 1024, 3072} per Nova embeddings schema docs
  (previous values 512/2048 were mistakenly referenced from Titan embedding model docs)
- Replace str(item) fallback with HTTPException(400) to avoid silent garbage embeddings
- Update schema.py dimensions comment: 'not used' -> 'Used by Nova embeddings'
- Replace getattr() with direct .dimensions access on Pydantic model
- Move dimension validation before the loop (validates once, not per-text)
- Add enumerate to batch loop; include input index in error detail
- Switch isinstance(item, Iterable) to isinstance(item, list) for precise matching
- Add comment explaining embeddingPurpose hardcoded to GENERIC_INDEX

---------

Co-authored-by: Gabriel <gabrielkoo@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Gabriel Koo
2026-02-26 11:41:17 +08:00
committed by GitHub
parent a1844f95d4
commit d14596ff47
2 changed files with 88 additions and 1 deletions

View File

@@ -81,6 +81,7 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
"amazon.titan-embed-text-v2:0": "Titan Embeddings G2 - Text",
# Disable Titan embedding.
# "amazon.titan-embed-image-v1": "Titan Multimodal Embeddings G1"
"amazon.nova-2-multimodal-embeddings-v1:0": "Nova Multimodal Embeddings V2",
}
ENCODER = tiktoken.get_encoding("cl100k_base")
@@ -1398,6 +1399,90 @@ class TitanEmbeddingsModel(BedrockEmbeddingsModel):
)
class NovaEmbeddingsModel(BedrockEmbeddingsModel):
# Per https://docs.aws.amazon.com/nova/latest/userguide/embeddings-schema.html
VALID_DIMENSIONS = {256, 384, 1024, 3072}
DEFAULT_DIMENSION = 3072
def _parse_args(self, text: str, dimensions: int | None = None) -> dict:
dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
return {
"taskType": "SINGLE_EMBEDDING",
"singleEmbeddingParams": {
# Nova supports 9 embeddingPurpose values; GENERIC_INDEX is the
# general-purpose default suitable for most retrieval use cases.
"embeddingPurpose": "GENERIC_INDEX",
"embeddingDimension": dim,
"text": {
"truncationMode": "END",
"value": text,
},
},
}
def embed(self, embeddings_request: EmbeddingsRequest) -> EmbeddingsResponse:
if isinstance(embeddings_request.input, str):
texts = [embeddings_request.input]
elif isinstance(embeddings_request.input, list):
if len(embeddings_request.input) == 0:
raise HTTPException(status_code=400, detail="Input list cannot be empty")
# Decode token arrays if needed
texts = []
for item in embeddings_request.input:
if isinstance(item, str):
texts.append(item)
elif isinstance(item, int):
texts.append(ENCODER.decode([item]))
elif isinstance(item, list):
texts.append(ENCODER.decode(item))
else:
raise HTTPException(
status_code=400,
detail=f"Unsupported input item type: {type(item).__name__}. Expected str, int, or list of ints.",
)
else:
raise HTTPException(status_code=400, detail="Unsupported input type")
dimensions = embeddings_request.dimensions
# Validate dimensions once before the loop — it's constant across all texts
dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
if dim not in self.VALID_DIMENSIONS:
raise HTTPException(
status_code=400,
detail=f"Invalid dimensions {dim}. Must be one of {sorted(self.VALID_DIMENSIONS)}",
)
all_embeddings = []
total_tokens = 0
for idx, text in enumerate(texts):
response = self._invoke_model(
args=self._parse_args(text, dimensions),
model_id=embeddings_request.model,
)
response_body = json.loads(response.get("body").read())
if DEBUG:
logger.info("Bedrock response body keys: " + str(list(response_body.keys())))
# Response: {"embeddings": [{"embeddingType": "TEXT", "embedding": [...]}]}
embeddings_list = response_body.get("embeddings", [])
if not embeddings_list:
raise HTTPException(
status_code=500,
detail=f"No embeddings returned from Nova model for input[{idx}]",
)
all_embeddings.append(embeddings_list[0]["embedding"])
# Nova doesn't return token counts in the response; approximate with cl100k_base
total_tokens += len(ENCODER.encode(text))
return self._create_response(
embeddings=all_embeddings,
model=embeddings_request.model,
input_tokens=total_tokens,
encoding_format=embeddings_request.encoding_format,
)
def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
model_name = SUPPORTED_BEDROCK_EMBEDDING_MODELS.get(model_id, "")
if DEBUG:
@@ -1407,6 +1492,8 @@ def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
return CohereEmbeddingsModel()
case "Titan Embeddings G2 - Text":
return TitanEmbeddingsModel()
case "Nova Multimodal Embeddings V2":
return NovaEmbeddingsModel()
case _:
logger.error("Unsupported model id " + model_id)
raise HTTPException(

View File

@@ -182,7 +182,7 @@ class EmbeddingsRequest(BaseModel):
input: str | list[str] | Iterable[int | Iterable[int]]
model: str
encoding_format: Literal["float", "base64"] = "float"
dimensions: int | None = None # not used.
dimensions: int | None = None # Used by Nova embeddings; ignored by other models.
user: str | None = None # not used.