From 1d4d263d577140afe8f38ce47fc5ada67752f14e Mon Sep 17 00:00:00 2001 From: Jiayi Date: Mon, 13 Oct 2025 21:39:01 -0700 Subject: [PATCH 1/2] fix: use extra_body for passing input_type params for asymmetric embedding models for NVIDIA Inference Provider --- .../remote/inference/nvidia/NVIDIA.md | 13 +- .../remote/inference/nvidia/nvidia.py | 55 ---- .../inference/test_openai_embeddings.py | 303 ++++++++++++++---- 3 files changed, 243 insertions(+), 128 deletions(-) diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 625be6088e..096ff28ac7 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -139,16 +139,13 @@ print(f"Structured Response: {structured_response.choices[0].message.content}") The following example shows how to create embeddings for an NVIDIA NIM. -> [!NOTE] -> NVIDIA asymmetric embedding models (e.g., `nvidia/llama-3.2-nv-embedqa-1b-v2`) require an `input_type` parameter not present in the standard OpenAI embeddings API. The NVIDIA Inference Adapter automatically sets `input_type="query"` when using the OpenAI-compatible embeddings endpoint for NVIDIA. For passage embeddings, use the `embeddings` API with `task_type="document"`. - ```python -response = client.inference.embeddings( - model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", - contents=["What is the capital of France?"], - task_type="query", +response = client.embeddings.create( + model="nvidia/llama-3.2-nv-embedqa-1b-v2", + input=["What is the capital of France?"], + extra_body={"input_type": "query"}, ) -print(f"Embeddings: {response.embeddings}") +print(f"Embeddings: {response.data}") ``` ### Vision Language Models Example diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 37864b0408..eab665d631 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -5,14 +5,6 @@ # the root directory of this source tree. -from openai import NOT_GIVEN - -from llama_stack.apis.inference import ( - OpenAIEmbeddingData, - OpenAIEmbeddingsRequestWithExtraBody, - OpenAIEmbeddingsResponse, - OpenAIEmbeddingUsage, -) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -76,50 +68,3 @@ def get_base_url(self) -> str: :return: The NVIDIA API base URL """ return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url - - async def openai_embeddings( - self, - params: OpenAIEmbeddingsRequestWithExtraBody, - ) -> OpenAIEmbeddingsResponse: - """ - OpenAI-compatible embeddings for NVIDIA NIM. - - Note: NVIDIA NIM asymmetric embedding models require an "input_type" field not present in the standard OpenAI embeddings API. - We default this to "query" to ensure requests succeed when using the - OpenAI-compatible endpoint. For passage embeddings, use the embeddings API with - `task_type='document'`. - """ - extra_body: dict[str, object] = {"input_type": "query"} - logger.warning( - "NVIDIA OpenAI-compatible embeddings: defaulting to input_type='query'. " - "For passage embeddings, use the embeddings API with task_type='document'." - ) - - response = await self.client.embeddings.create( - model=await self._get_provider_model_id(params.model), - input=params.input, - encoding_format=params.encoding_format if params.encoding_format is not None else NOT_GIVEN, - dimensions=params.dimensions if params.dimensions is not None else NOT_GIVEN, - user=params.user if params.user is not None else NOT_GIVEN, - extra_body=extra_body, - ) - - data = [] - for i, embedding_data in enumerate(response.data): - data.append( - OpenAIEmbeddingData( - embedding=embedding_data.embedding, - index=i, - ) - ) - - usage = OpenAIEmbeddingUsage( - prompt_tokens=response.usage.prompt_tokens, - total_tokens=response.usage.total_tokens, - ) - - return OpenAIEmbeddingsResponse( - data=data, - model=response.model, - usage=usage, - ) diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py index fc2f66b9c1..afe9f31f12 100644 --- a/tests/integration/inference/test_openai_embeddings.py +++ b/tests/integration/inference/test_openai_embeddings.py @@ -21,6 +21,16 @@ def decode_base64_to_floats(base64_string: str) -> list[float]: return list(embedding_floats) +ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER = { + "remote::nvidia": [ + "nvidia/llama-3.2-nv-embedqa-1b-v2", + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", + ], +} + + def provider_from_model(client_with_models, model_id): models = {m.identifier: m for m in client_with_models.models.list()} models.update({m.provider_resource_id: m for m in client_with_models.models.list()}) @@ -29,6 +39,25 @@ def provider_from_model(client_with_models, model_id): return providers[provider_id] +def is_asymmetric_model(client_with_models, model_id): + provider = provider_from_model(client_with_models, model_id) + provider_type = provider.provider_type + + if provider_type not in ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER: + return False + + return model_id in ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER[provider_type] + + +def get_extra_body_for_model(client_with_models, model_id, input_type="query"): + provider = provider_from_model(client_with_models, model_id) + + if provider.provider_type == "remote::nvidia": + return {"input_type": input_type} + + return None + + def skip_if_model_doesnt_support_user_param(client, model_id): provider = provider_from_model(client, model_id) if provider.provider_type in ( @@ -40,17 +69,29 @@ def skip_if_model_doesnt_support_user_param(client, model_id): def skip_if_model_doesnt_support_encoding_format_base64(client, model_id): provider = provider_from_model(client, model_id) - if provider.provider_type in ( + + should_skip = provider.provider_type in ( "remote::databricks", # param silently ignored, always returns floats "remote::fireworks", # param silently ignored, always returns list of floats "remote::ollama", # param silently ignored, always returns list of floats - ): + ) or ( + provider.provider_type == "remote::nvidia" + and model_id + in [ + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", + ] + ) + + if should_skip: pytest.skip(f"Model {model_id} hosted by {provider.provider_type} does not support encoding_format='base64'.") def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_id): provider = provider_from_model(client_with_models, model_id) - if ( + + should_skip = ( provider.provider_type in ( "remote::together", # returns 400 @@ -59,11 +100,19 @@ def skip_if_model_doesnt_support_variable_dimensions(client_with_models, model_i "remote::databricks", "remote::watsonx", # openai.BadRequestError: Error code: 400 - {'detail': "litellm.UnsupportedParamsError: watsonx does not support parameters: {'dimensions': 384} ) - ): - pytest.skip( - f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions." + or (provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id) + or ( + provider.provider_type == "remote::nvidia" + and model_id + in [ + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", + ] ) - if provider.provider_type == "remote::openai" and "text-embedding-3" not in model_id: + ) + + if should_skip: pytest.skip( f"Model {model_id} hosted by {provider.provider_type} does not support variable output embedding dimensions." ) @@ -100,12 +149,27 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_text = "Hello, world!" - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text, - encoding_format="float", - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + # For asymmetric models, verify that calling without extra_body raises an error + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "float", + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "float", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) assert response.object == "list" @@ -124,12 +188,26 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_texts = ["Hello, world!", "How are you today?", "This is a test."] - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_texts, - encoding_format="float", - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_texts, + "encoding_format": "float", + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_texts, + "encoding_format": "float", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) assert response.object == "list" @@ -150,12 +228,26 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_text = "Test encoding format" - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text, - encoding_format="float", - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "float", + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "float", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) assert response.object == "list" assert len(response.data) == 1 @@ -170,12 +262,26 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em input_text = "Test dimensions parameter" dimensions = 16 - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text, - dimensions=dimensions, - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text, + "dimensions": dimensions, + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_text, + "dimensions": dimensions, + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) assert response.object == "list" assert len(response.data) == 1 @@ -191,12 +297,26 @@ def test_openai_embeddings_with_user_parameter(compat_client, client_with_models input_text = "Test user parameter" user_id = "test-user-123" - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text, - user=user_id, - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text, + "user": user_id, + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_text, + "user": user_id, + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) assert response.object == "list" assert len(response.data) == 1 @@ -208,11 +328,17 @@ def test_openai_embeddings_empty_list_error(compat_client, client_with_models, e """Test that empty list input raises an appropriate error.""" skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + kwargs = { + "model": embedding_model_id, + "input": [], + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create( - model=embedding_model_id, - input=[], - ) + compat_client.embeddings.create(**kwargs) def test_openai_embeddings_invalid_model_error(compat_client, client_with_models, embedding_model_id): @@ -232,18 +358,35 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli input_text1 = "This is the first text" input_text2 = "This is completely different content" - - response1 = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text1, - encoding_format="float", - ) - - response2 = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text2, - encoding_format="float", - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text1, + "encoding_format": "float", + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs1 = { + "model": embedding_model_id, + "input": input_text1, + "encoding_format": "float", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs1["extra_body"] = extra_body + + kwargs2 = { + "model": embedding_model_id, + "input": input_text2, + "encoding_format": "float", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs2["extra_body"] = extra_body + + response1 = compat_client.embeddings.create(**kwargs1) + response2 = compat_client.embeddings.create(**kwargs2) embedding1 = response1.data[0].embedding embedding2 = response2.data[0].embedding @@ -261,13 +404,28 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit input_text = "Test base64 encoding format" dimensions = 12 - - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_text, - encoding_format="base64", - dimensions=dimensions, - ) + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "base64", + "dimensions": dimensions, + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_text, + "encoding_format": "base64", + "dimensions": dimensions, + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) # Validate response structure assert response.object == "list" @@ -293,12 +451,27 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id) input_texts = ["First text for base64", "Second text for base64", "Third text for base64"] + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs_without_extra = { + "model": embedding_model_id, + "input": input_texts, + "encoding_format": "base64", + } + with pytest.raises(Exception): # noqa: B017 + compat_client.embeddings.create(**kwargs_without_extra) + + kwargs = { + "model": embedding_model_id, + "input": input_texts, + "encoding_format": "base64", + } + if is_asymmetric_model(client_with_models, embedding_model_id): + kwargs["extra_body"] = extra_body + + response = compat_client.embeddings.create(**kwargs) - response = compat_client.embeddings.create( - model=embedding_model_id, - input=input_texts, - encoding_format="base64", - ) # Validate response structure assert response.object == "list" From 33190c13f81bd26c5b99cd2f889751c0af2bdade Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 14 Oct 2025 12:57:17 -0700 Subject: [PATCH 2/2] Refactor test_openai_embeddings --- .../inference/test_openai_embeddings.py | 256 +++++------------- 1 file changed, 73 insertions(+), 183 deletions(-) diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py index afe9f31f12..00de56f3a0 100644 --- a/tests/integration/inference/test_openai_embeddings.py +++ b/tests/integration/inference/test_openai_embeddings.py @@ -12,15 +12,6 @@ from llama_stack.core.library_client import LlamaStackAsLibraryClient - -def decode_base64_to_floats(base64_string: str) -> list[float]: - """Helper function to decode base64 string to list of float32 values.""" - embedding_bytes = base64.b64decode(base64_string) - float_count = len(embedding_bytes) // 4 # 4 bytes per float32 - embedding_floats = struct.unpack(f"{float_count}f", embedding_bytes) - return list(embedding_floats) - - ASYMMETRIC_EMBEDDING_MODELS_BY_PROVIDER = { "remote::nvidia": [ "nvidia/llama-3.2-nv-embedqa-1b-v2", @@ -31,6 +22,14 @@ def decode_base64_to_floats(base64_string: str) -> list[float]: } +def decode_base64_to_floats(base64_string: str) -> list[float]: + """Helper function to decode base64 string to list of float32 values.""" + embedding_bytes = base64.b64decode(base64_string) + float_count = len(embedding_bytes) // 4 # 4 bytes per float32 + embedding_floats = struct.unpack(f"{float_count}f", embedding_bytes) + return list(embedding_floats) + + def provider_from_model(client_with_models, model_id): models = {m.identifier: m for m in client_with_models.models.list()} models.update({m.provider_resource_id: m for m in client_with_models.models.list()}) @@ -50,6 +49,9 @@ def is_asymmetric_model(client_with_models, model_id): def get_extra_body_for_model(client_with_models, model_id, input_type="query"): + if not is_asymmetric_model(client_with_models, model_id): + return None + provider = provider_from_model(client_with_models, model_id) if provider.provider_type == "remote::nvidia": @@ -149,27 +151,13 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_text = "Hello, world!" - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - # For asymmetric models, verify that calling without extra_body raises an error - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "float", - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "float", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text, + encoding_format="float", + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) assert response.object == "list" @@ -188,26 +176,13 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_texts = ["Hello, world!", "How are you today?", "This is a test."] - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_texts, - "encoding_format": "float", - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_texts, - "encoding_format": "float", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_texts, + encoding_format="float", + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) assert response.object == "list" @@ -228,26 +203,13 @@ def test_openai_embeddings_with_encoding_format_float(compat_client, client_with skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) input_text = "Test encoding format" - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "float", - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "float", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text, + encoding_format="float", + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) assert response.object == "list" assert len(response.data) == 1 @@ -262,26 +224,13 @@ def test_openai_embeddings_with_dimensions(compat_client, client_with_models, em input_text = "Test dimensions parameter" dimensions = 16 - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text, - "dimensions": dimensions, - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_text, - "dimensions": dimensions, - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text, + dimensions=dimensions, + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) assert response.object == "list" assert len(response.data) == 1 @@ -297,26 +246,13 @@ def test_openai_embeddings_with_user_parameter(compat_client, client_with_models input_text = "Test user parameter" user_id = "test-user-123" - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text, - "user": user_id, - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_text, - "user": user_id, - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text, + user=user_id, + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) assert response.object == "list" assert len(response.data) == 1 @@ -328,17 +264,12 @@ def test_openai_embeddings_empty_list_error(compat_client, client_with_models, e """Test that empty list input raises an appropriate error.""" skip_if_model_doesnt_support_openai_embeddings(client_with_models, embedding_model_id) - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - - kwargs = { - "model": embedding_model_id, - "input": [], - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs) + compat_client.embeddings.create( + model=embedding_model_id, + input=[], + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) def test_openai_embeddings_invalid_model_error(compat_client, client_with_models, embedding_model_id): @@ -349,6 +280,7 @@ def test_openai_embeddings_invalid_model_error(compat_client, client_with_models compat_client.embeddings.create( model="invalid-model-id", input="Test text", + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), ) @@ -358,35 +290,21 @@ def test_openai_embeddings_different_inputs_different_outputs(compat_client, cli input_text1 = "This is the first text" input_text2 = "This is completely different content" + extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) + response1 = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text1, + encoding_format="float", + extra_body=extra_body, + ) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text1, - "encoding_format": "float", - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs1 = { - "model": embedding_model_id, - "input": input_text1, - "encoding_format": "float", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs1["extra_body"] = extra_body - - kwargs2 = { - "model": embedding_model_id, - "input": input_text2, - "encoding_format": "float", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs2["extra_body"] = extra_body - - response1 = compat_client.embeddings.create(**kwargs1) - response2 = compat_client.embeddings.create(**kwargs2) + response2 = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text2, + encoding_format="float", + extra_body=extra_body, + ) embedding1 = response1.data[0].embedding embedding2 = response2.data[0].embedding @@ -404,28 +322,14 @@ def test_openai_embeddings_with_encoding_format_base64(compat_client, client_wit input_text = "Test base64 encoding format" dimensions = 12 - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "base64", - "dimensions": dimensions, - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_text, - "encoding_format": "base64", - "dimensions": dimensions, - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_text, + encoding_format="base64", + dimensions=dimensions, + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) # Validate response structure assert response.object == "list" @@ -451,27 +355,13 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo skip_if_model_doesnt_support_encoding_format_base64(client_with_models, embedding_model_id) input_texts = ["First text for base64", "Second text for base64", "Third text for base64"] - extra_body = get_extra_body_for_model(client_with_models, embedding_model_id) - - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs_without_extra = { - "model": embedding_model_id, - "input": input_texts, - "encoding_format": "base64", - } - with pytest.raises(Exception): # noqa: B017 - compat_client.embeddings.create(**kwargs_without_extra) - - kwargs = { - "model": embedding_model_id, - "input": input_texts, - "encoding_format": "base64", - } - if is_asymmetric_model(client_with_models, embedding_model_id): - kwargs["extra_body"] = extra_body - - response = compat_client.embeddings.create(**kwargs) + response = compat_client.embeddings.create( + model=embedding_model_id, + input=input_texts, + encoding_format="base64", + extra_body=get_extra_body_for_model(client_with_models, embedding_model_id), + ) # Validate response structure assert response.object == "list"