From 1d4d263d577140afe8f38ce47fc5ada67752f14e Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Mon, 13 Oct 2025 21:39:01 -0700
Subject: [PATCH 1/2] fix: use extra_body for passing input_type params for
 asymmetric embedding models for NVIDIA Inference Provider

---
 .../remote/inference/nvidia/NVIDIA.md         |  13 +-
 .../remote/inference/nvidia/nvidia.py         |  55 ----
 .../inference/test_openai_embeddings.py       | 303 ++++++++++++++----
 3 files changed, 243 insertions(+), 128 deletions(-)

diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index 625be6088e..096ff28ac7 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -139,16 +139,13 @@ print(f"Structured Response: {structured_response.choices[0].message.content}")
 
 The following example shows how to create embeddings for an NVIDIA NIM.
 
-> [!NOTE]
-> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`.
-
 ```python
-response = client.inference.embeddings(
-    model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
-    contents=["What is the capital of France?"],
-    task_type="query",
+response = client.embeddings.create(
+    model="nvidia/llama-3.2-nv-embedqa-1b-v2",
+    input=["What is the capital of France?"],
+    extra_body={"input_type": "query"},
 )
-print(f"Embeddings: {response.embeddings}")
+print(f"Embeddings: {response.data}")
 ```
 
 ### Vision Language Models Example
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 37864b0408..eab665d631 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -5,14 +5,6 @@
 # the root directory of this source tree.
 
 
-from openai import NOT_GIVEN
-
-from llama_stack.apis.inference import (
-    OpenAIEmbeddingData,
-    OpenAIEmbeddingsRequestWithExtraBody,
-    OpenAIEmbeddingsResponse,
-    OpenAIEmbeddingUsage,
-)
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
@@ -76,50 +68,3 @@ def get_base_url(self) -> str:
         :return: The NVIDIA API base URL
         """
         return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
-
-    async def openai_embeddings(
-        self,
-        params: OpenAIEmbeddingsRequestWithExtraBody,
-    ) -> OpenAIEmbeddingsResponse:
-        """
-        OpenAI-compatible embeddings for NVIDIA NIM.
-
-        Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API.
-        We default this to "query" to ensure requests succeed when using the
-        OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with
-        `task_type='document'`.
-        """
-        extra_body: dict[str, object] = {"input_type": "query"}
-        logger.warning(
-            "NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. "
-            "For passage embeddings, use the embeddings API with task_type='document'."
-        )
-
-        response = await self.client.embeddings.create(
-            model=await self._get_provider_model_id(params.model),
-            input=params.input,
-            encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN,
-            dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN,
-            user=params.user if params.user is not None else NOT_GIVEN,
-            extra_body=extra_body,
-        )
-
-        data = []
-        for i, embedding_data in enumerate(response.data):
-            data.append(
-                OpenAIEmbeddingData(
-                    embedding=embedding_data.embedding,
-                    index=i,
-                )
-            )
-
-        usage = OpenAIEmbeddingUsage(
-            prompt_tokens=response.usage.prompt_tokens,
-            total_tokens=response.usage.total_tokens,
-        )
-
-        return OpenAIEmbeddingsResponse(
-            data=data,
-            model=response.model,
-            usage=usage,
-        )
diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
index fc2f66b9c1..afe9f31f12 100644
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -21,6 +21,16 @@ def decode_base64_to_floats(base64_string: str) -> list[float]:
     return list(embedding_floats)
 
 
+ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER = {
+    "remote::nvidia": [
+        "nvidia/llama-3.2-nv-embedqa-1b-v2",
+        "nvidia/nv-embedqa-e5-v5",
+        "nvidia/nv-embedqa-mistral-7b-v2",
+        "snowflake/arctic-embed-l",
+    ],
+}
+
+
 def provider_from_model(client_with_models, model_id):
     models = {m.identifier: m for m in client_with_models.models.list()}
     models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
@@ -29,6 +39,25 @@ def provider_from_model(client_with_models, model_id):
     return providers[provider_id]
 
 
+def is_asymmetric_model(client_with_models, model_id):
+    provider = provider_from_model(client_with_models, model_id)
+    provider_type = provider.provider_type
+
+    if provider_type not in ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER:
+        return False
+
+    return model_id in ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER[provider_type]
+
+
+def get_extra_body_for_model(client_with_models, model_id, input_type="query"):
+    provider = provider_from_model(client_with_models, model_id)
+
+    if provider.provider_type == "remote::nvidia":
+        return {"input_type": input_type}
+
+    return None
+
+
 def skip_if_model_doesnt_support_user_param(client, model_id):
     provider = provider_from_model(client, model_id)
     if provider.provider_type in (
@@ -40,17 +69,29 @@ def skip_if_model_doesnt_support_user_param(client, model_id):
 
 def skip_if_model_doesnt_support_encoding_format_base64(client, model_id):
     provider = provider_from_model(client, model_id)
-    if provider.provider_type in (
+
+    should_skip = provider.provider_type in (
         "remote::databricks",  # param silently ignored, always returns floats
         "remote::fireworks",  # param silently ignored, always returns list of floats
         "remote::ollama",  # param silently ignored, always returns list of floats
-    ):
+    ) or (
+        provider.provider_type == "remote::nvidia"
+        and model_id
+        in [
+            "nvidia/nv-embedqa-e5-v5",
+            "nvidia/nv-embedqa-mistral-7b-v2",
+            "snowflake/arctic-embed-l",
+        ]
+    )
+
+    if should_skip:
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support encoding_format='base64'.")
 
 
 def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_id):
     provider = provider_from_model(client_with_models, model_id)
-    if (
+
+    should_skip = (
         provider.provider_type
         in (
             "remote::together",  # returns 400
@@ -59,11 +100,19 @@ def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_i
             "remote::databricks",
             "remote::watsonx",  # openai.BadRequestError: Error code: 400 - {'detail': "litellm.UnsupportedParamsError: watsonx does not support parameters: {'dimensions': 384}
         )
-    ):
-        pytest.skip(
-            f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
+        or (provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id)
+        or (
+            provider.provider_type == "remote::nvidia"
+            and model_id
+            in [
+                "nvidia/nv-embedqa-e5-v5",
+                "nvidia/nv-embedqa-mistral-7b-v2",
+                "snowflake/arctic-embed-l",
+            ]
         )
-    if provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id:
+    )
+
+    if should_skip:
         pytest.skip(
             f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions."
         )
@@ -100,12 +149,27 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_text = "Hello, world!"
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text,
-        encoding_format="float",
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    # For asymmetric models, verify that calling without extra_body raises an error
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text,
+            "encoding_format": "float",
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_text,
+        "encoding_format": "float",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     assert response.object == "list"
 
@@ -124,12 +188,26 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_texts = ["Hello, world!", "How are you today?", "This is a test."]
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_texts,
-        encoding_format="float",
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_texts,
+            "encoding_format": "float",
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_texts,
+        "encoding_format": "float",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     assert response.object == "list"
 
@@ -150,12 +228,26 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_text = "Test encoding format"
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text,
-        encoding_format="float",
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text,
+            "encoding_format": "float",
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_text,
+        "encoding_format": "float",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -170,12 +262,26 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em
 
     input_text = "Test dimensions parameter"
     dimensions = 16
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text,
-        dimensions=dimensions,
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text,
+            "dimensions": dimensions,
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_text,
+        "dimensions": dimensions,
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -191,12 +297,26 @@ def test_openai_embeddings_with_user_parameter(compat_client, client_with_models
 
     input_text = "Test user parameter"
     user_id = "test-user-123"
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text,
-        user=user_id,
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text,
+            "user": user_id,
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_text,
+        "user": user_id,
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -208,11 +328,17 @@ def test_openai_embeddings_empty_list_error(compat_client, client_with_models, e
     """Test that empty list input raises an appropriate error."""
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": [],
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
     with pytest.raises(Exception):  # noqa: B017
-        compat_client.embeddings.create(
-            model=embedding_model_id,
-            input=[],
-        )
+        compat_client.embeddings.create(**kwargs)
 
 
 def test_openai_embeddings_invalid_model_error(compat_client, client_with_models, embedding_model_id):
@@ -232,18 +358,35 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
 
     input_text1 = "This is the first text"
     input_text2 = "This is completely different content"
-
-    response1 = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text1,
-        encoding_format="float",
-    )
-
-    response2 = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text2,
-        encoding_format="float",
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text1,
+            "encoding_format": "float",
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs1 = {
+        "model": embedding_model_id,
+        "input": input_text1,
+        "encoding_format": "float",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs1["extra_body"] = extra_body
+
+    kwargs2 = {
+        "model": embedding_model_id,
+        "input": input_text2,
+        "encoding_format": "float",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs2["extra_body"] = extra_body
+
+    response1 = compat_client.embeddings.create(**kwargs1)
+    response2 = compat_client.embeddings.create(**kwargs2)
 
     embedding1 = response1.data[0].embedding
     embedding2 = response2.data[0].embedding
@@ -261,13 +404,28 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit
 
     input_text = "Test base64 encoding format"
     dimensions = 12
-
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_text,
-        encoding_format="base64",
-        dimensions=dimensions,
-    )
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_text,
+            "encoding_format": "base64",
+            "dimensions": dimensions,
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_text,
+        "encoding_format": "base64",
+        "dimensions": dimensions,
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
     # Validate response structure
     assert response.object == "list"
@@ -293,12 +451,27 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo
     skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)
 
     input_texts = ["First text for base64", "Second text for base64", "Third text for base64"]
+    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs_without_extra = {
+            "model": embedding_model_id,
+            "input": input_texts,
+            "encoding_format": "base64",
+        }
+        with pytest.raises(Exception):  # noqa: B017
+            compat_client.embeddings.create(**kwargs_without_extra)
+
+    kwargs = {
+        "model": embedding_model_id,
+        "input": input_texts,
+        "encoding_format": "base64",
+    }
+    if is_asymmetric_model(client_with_models, embedding_model_id):
+        kwargs["extra_body"] = extra_body
+
+    response = compat_client.embeddings.create(**kwargs)
 
-    response = compat_client.embeddings.create(
-        model=embedding_model_id,
-        input=input_texts,
-        encoding_format="base64",
-    )
     # Validate response structure
     assert response.object == "list"
 

From 33190c13f81bd26c5b99cd2f889751c0af2bdade Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Tue, 14 Oct 2025 12:57:17 -0700
Subject: [PATCH 2/2] Refactor test_openai_embeddings

---
 .../inference/test_openai_embeddings.py       | 256 +++++-------------
 1 file changed, 73 insertions(+), 183 deletions(-)

diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
index afe9f31f12..00de56f3a0 100644
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -12,15 +12,6 @@
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
 
-
-def decode_base64_to_floats(base64_string: str) -> list[float]:
-    """Helper function to decode base64 string to list of float32 values."""
-    embedding_bytes = base64.b64decode(base64_string)
-    float_count = len(embedding_bytes) // 4  # 4 bytes per float32
-    embedding_floats = struct.unpack(f"{float_count}f", embedding_bytes)
-    return list(embedding_floats)
-
-
 ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER = {
     "remote::nvidia": [
         "nvidia/llama-3.2-nv-embedqa-1b-v2",
@@ -31,6 +22,14 @@ def decode_base64_to_floats(base64_string: str) -> list[float]:
 }
 
 
+def decode_base64_to_floats(base64_string: str) -> list[float]:
+    """Helper function to decode base64 string to list of float32 values."""
+    embedding_bytes = base64.b64decode(base64_string)
+    float_count = len(embedding_bytes) // 4  # 4 bytes per float32
+    embedding_floats = struct.unpack(f"{float_count}f", embedding_bytes)
+    return list(embedding_floats)
+
+
 def provider_from_model(client_with_models, model_id):
     models = {m.identifier: m for m in client_with_models.models.list()}
     models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
@@ -50,6 +49,9 @@ def is_asymmetric_model(client_with_models, model_id):
 
 
 def get_extra_body_for_model(client_with_models, model_id, input_type="query"):
+    if not is_asymmetric_model(client_with_models, model_id):
+        return None
+
     provider = provider_from_model(client_with_models, model_id)
 
     if provider.provider_type == "remote::nvidia":
@@ -149,27 +151,13 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_text = "Hello, world!"
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    # For asymmetric models, verify that calling without extra_body raises an error
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text,
-            "encoding_format": "float",
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_text,
-        "encoding_format": "float",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="float",
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     assert response.object == "list"
 
@@ -188,26 +176,13 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_texts = ["Hello, world!", "How are you today?", "This is a test."]
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_texts,
-            "encoding_format": "float",
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_texts,
-        "encoding_format": "float",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_texts,
+        encoding_format="float",
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     assert response.object == "list"
 
@@ -228,26 +203,13 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
     input_text = "Test encoding format"
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text,
-            "encoding_format": "float",
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_text,
-        "encoding_format": "float",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="float",
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -262,26 +224,13 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em
 
     input_text = "Test dimensions parameter"
     dimensions = 16
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text,
-            "dimensions": dimensions,
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_text,
-        "dimensions": dimensions,
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        dimensions=dimensions,
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -297,26 +246,13 @@ def test_openai_embeddings_with_user_parameter(compat_client, client_with_models
 
     input_text = "Test user parameter"
     user_id = "test-user-123"
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text,
-            "user": user_id,
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_text,
-        "user": user_id,
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        user=user_id,
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     assert response.object == "list"
     assert len(response.data) == 1
@@ -328,17 +264,12 @@ def test_openai_embeddings_empty_list_error(compat_client, client_with_models, e
     """Test that empty list input raises an appropriate error."""
     skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id)
 
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": [],
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
     with pytest.raises(Exception):  # noqa: B017
-        compat_client.embeddings.create(**kwargs)
+        compat_client.embeddings.create(
+            model=embedding_model_id,
+            input=[],
+            extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+        )
 
 
 def test_openai_embeddings_invalid_model_error(compat_client, client_with_models, embedding_model_id):
@@ -349,6 +280,7 @@ def test_openai_embeddings_invalid_model_error(compat_client, client_with_models
         compat_client.embeddings.create(
             model="invalid-model-id",
             input="Test text",
+            extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
         )
 
 
@@ -358,35 +290,21 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli
 
     input_text1 = "This is the first text"
     input_text2 = "This is completely different content"
+
     extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
+    response1 = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text1,
+        encoding_format="float",
+        extra_body=extra_body,
+    )
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text1,
-            "encoding_format": "float",
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs1 = {
-        "model": embedding_model_id,
-        "input": input_text1,
-        "encoding_format": "float",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs1["extra_body"] = extra_body
-
-    kwargs2 = {
-        "model": embedding_model_id,
-        "input": input_text2,
-        "encoding_format": "float",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs2["extra_body"] = extra_body
-
-    response1 = compat_client.embeddings.create(**kwargs1)
-    response2 = compat_client.embeddings.create(**kwargs2)
+    response2 = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text2,
+        encoding_format="float",
+        extra_body=extra_body,
+    )
 
     embedding1 = response1.data[0].embedding
     embedding2 = response2.data[0].embedding
@@ -404,28 +322,14 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit
 
     input_text = "Test base64 encoding format"
     dimensions = 12
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
 
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_text,
-            "encoding_format": "base64",
-            "dimensions": dimensions,
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_text,
-        "encoding_format": "base64",
-        "dimensions": dimensions,
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_text,
+        encoding_format="base64",
+        dimensions=dimensions,
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
 
     # Validate response structure
     assert response.object == "list"
@@ -451,27 +355,13 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo
     skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id)
 
     input_texts = ["First text for base64", "Second text for base64", "Third text for base64"]
-    extra_body = get_extra_body_for_model(client_with_models, embedding_model_id)
-
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs_without_extra = {
-            "model": embedding_model_id,
-            "input": input_texts,
-            "encoding_format": "base64",
-        }
-        with pytest.raises(Exception):  # noqa: B017
-            compat_client.embeddings.create(**kwargs_without_extra)
-
-    kwargs = {
-        "model": embedding_model_id,
-        "input": input_texts,
-        "encoding_format": "base64",
-    }
-    if is_asymmetric_model(client_with_models, embedding_model_id):
-        kwargs["extra_body"] = extra_body
-
-    response = compat_client.embeddings.create(**kwargs)
 
+    response = compat_client.embeddings.create(
+        model=embedding_model_id,
+        input=input_texts,
+        encoding_format="base64",
+        extra_body=get_extra_body_for_model(client_with_models, embedding_model_id),
+    )
     # Validate response structure
     assert response.object == "list"