feat: add Amazon Nova 2 multimodal embeddings support (#222)

* feat: add Amazon Nova 2 multimodal embeddings support Adds support for `amazon.nova-2-multimodal-embeddings-v1:0` via the new `NovaEmbeddingsModel` class, using the `taskType`/`singleEmbeddingParams` request format documented in the Nova 2 user guide. - Supports single and batch text inputs - Respects the `dimensions` parameter (256/512/1024/2048/3072, default 3072) - Supports `float` and `base64` encoding formats - Includes `test_nova_embed.py` for quick end-to-end verification Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: remove test script from repo Test script moved to PR description instead. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix: validate Nova embedding dimensions and fix falsy-zero bug - Add VALID_DIMENSIONS set and upfront validation with a clear error message - Fix `dimensions or DEFAULT` which would incorrectly ignore dimensions=0 - Add inline comment explaining approximate token counting (Nova API does not return token counts in the response) * fix: address PR review comments for NovaEmbeddingsModel - Fix VALID_DIMENSIONS to {256, 384, 1024, 3072} per Nova embeddings schema docs (previous values 512/2048 were mistakenly referenced from Titan embedding model docs) - Replace str(item) fallback with HTTPException(400) to avoid silent garbage embeddings - Update schema.py dimensions comment: 'not used' -> 'Used by Nova embeddings' - Replace getattr() with direct .dimensions access on Pydantic model - Move dimension validation before the loop (validates once, not per-text) - Add enumerate to batch loop; include input index in error detail - Switch isinstance(item, Iterable) to isinstance(item, list) for precise matching - Add comment explaining embeddingPurpose hardcoded to GENERIC_INDEX --------- Co-authored-by: Gabriel <gabrielkoo@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 11:41:17 +08:00
parent a1844f95d4
commit d14596ff47
2 changed files with 88 additions and 1 deletions
--- a/src/api/models/bedrock.py
+++ b/src/api/models/bedrock.py
@@ -81,6 +81,7 @@ SUPPORTED_BEDROCK_EMBEDDING_MODELS = {
    "amazon.titan-embed-text-v2:0": "Titan Embeddings G2 - Text",
    # Disable Titan embedding.
    # "amazon.titan-embed-image-v1": "Titan Multimodal Embeddings G1"
+    "amazon.nova-2-multimodal-embeddings-v1:0": "Nova Multimodal Embeddings V2",
 }

 ENCODER = tiktoken.get_encoding("cl100k_base")
@@ -1398,6 +1399,90 @@ class TitanEmbeddingsModel(BedrockEmbeddingsModel):
        )


+class NovaEmbeddingsModel(BedrockEmbeddingsModel):
+    # Per https://docs.aws.amazon.com/nova/latest/userguide/embeddings-schema.html
+    VALID_DIMENSIONS = {256, 384, 1024, 3072}
+    DEFAULT_DIMENSION = 3072
+
+    def _parse_args(self, text: str, dimensions: int | None = None) -> dict:
+        dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
+        return {
+            "taskType": "SINGLE_EMBEDDING",
+            "singleEmbeddingParams": {
+                # Nova supports 9 embeddingPurpose values; GENERIC_INDEX is the
+                # general-purpose default suitable for most retrieval use cases.
+                "embeddingPurpose": "GENERIC_INDEX",
+                "embeddingDimension": dim,
+                "text": {
+                    "truncationMode": "END",
+                    "value": text,
+                },
+            },
+        }
+
+    def embed(self, embeddings_request: EmbeddingsRequest) -> EmbeddingsResponse:
+        if isinstance(embeddings_request.input, str):
+            texts = [embeddings_request.input]
+        elif isinstance(embeddings_request.input, list):
+            if len(embeddings_request.input) == 0:
+                raise HTTPException(status_code=400, detail="Input list cannot be empty")
+            # Decode token arrays if needed
+            texts = []
+            for item in embeddings_request.input:
+                if isinstance(item, str):
+                    texts.append(item)
+                elif isinstance(item, int):
+                    texts.append(ENCODER.decode([item]))
+                elif isinstance(item, list):
+                    texts.append(ENCODER.decode(item))
+                else:
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Unsupported input item type: {type(item).__name__}. Expected str, int, or list of ints.",
+                    )
+        else:
+            raise HTTPException(status_code=400, detail="Unsupported input type")
+
+        dimensions = embeddings_request.dimensions
+        # Validate dimensions once before the loop — it's constant across all texts
+        dim = dimensions if dimensions is not None else self.DEFAULT_DIMENSION
+        if dim not in self.VALID_DIMENSIONS:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Invalid dimensions {dim}. Must be one of {sorted(self.VALID_DIMENSIONS)}",
+            )
+
+        all_embeddings = []
+        total_tokens = 0
+
+        for idx, text in enumerate(texts):
+            response = self._invoke_model(
+                args=self._parse_args(text, dimensions),
+                model_id=embeddings_request.model,
+            )
+            response_body = json.loads(response.get("body").read())
+            if DEBUG:
+                logger.info("Bedrock response body keys: " + str(list(response_body.keys())))
+
+            # Response: {"embeddings": [{"embeddingType": "TEXT", "embedding": [...]}]}
+            embeddings_list = response_body.get("embeddings", [])
+            if not embeddings_list:
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"No embeddings returned from Nova model for input[{idx}]",
+                )
+            all_embeddings.append(embeddings_list[0]["embedding"])
+            # Nova doesn't return token counts in the response; approximate with cl100k_base
+            total_tokens += len(ENCODER.encode(text))
+
+        return self._create_response(
+            embeddings=all_embeddings,
+            model=embeddings_request.model,
+            input_tokens=total_tokens,
+            encoding_format=embeddings_request.encoding_format,
+        )
+
+
 def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
    model_name = SUPPORTED_BEDROCK_EMBEDDING_MODELS.get(model_id, "")
    if DEBUG:
@@ -1407,6 +1492,8 @@ def get_embeddings_model(model_id: str) -> BedrockEmbeddingsModel:
            return CohereEmbeddingsModel()
        case "Titan Embeddings G2 - Text":
            return TitanEmbeddingsModel()
+        case "Nova Multimodal Embeddings V2":
+            return NovaEmbeddingsModel()
        case _:
            logger.error("Unsupported model id " + model_id)
            raise HTTPException(
--- a/src/api/schema.py
+++ b/src/api/schema.py
@@ -182,7 +182,7 @@ class EmbeddingsRequest(BaseModel):
    input: str | list[str] | Iterable[int | Iterable[int]]
    model: str
    encoding_format: Literal["float", "base64"] = "float"
-    dimensions: int | None = None  # not used.
+    dimensions: int | None = None  # Used by Nova embeddings; ignored by other models.
    user: str | None = None  # not used.