From 8d07ffbc4d772de087c4136ebe69315b1b64bc5f Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 15:19:22 +0300
Subject: [PATCH 01/27] update infinity-emb version to 0.0.77 and add optimum
 dependency

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9812a81..d82080a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 runpod~=1.7.0
-infinity-emb[all]==0.0.76
+infinity-emb[all]==0.0.77
+optimum==1.24.0
 einops # deployment of custom code with nomic
 git+https://github.com/pytorch-labs/float8_experimental.git

From 9acf3f0d24c41ced6c88b7472aeecb5d48011b6d Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 18:00:27 +0300
Subject: [PATCH 02/27] added clip-based model

---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 2c81997..0bd5734 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,7 +13,7 @@ services:
                 count: all
                 capabilities: [gpu]
       environment:
-        MODEL_NAMES: "BAAI/bge-small-en-v1.5"
+        MODEL_NAMES: "BAAI/bge-small-en-v1.5;patrickjohncyh/fashion-clip"
         NVIDIA_VISIBLE_DEVICES: "all"
       volumes:
         - ./data/runpod-volume:/runpod-volume

From 2c130ca6d61c1057b69ab1efecfffc6293b560cf Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 18:00:40 +0300
Subject: [PATCH 03/27] multimodal tests

---
 .runpod/tests.json | 93 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 89 insertions(+), 4 deletions(-)

diff --git a/.runpod/tests.json b/.runpod/tests.json
index 02b8a4f..ed0d318 100644
--- a/.runpod/tests.json
+++ b/.runpod/tests.json
@@ -1,12 +1,97 @@
 {
   "tests": [
     {
-      "name": "basic_test",
+      "name": "multimodal_text",
       "input": {
-        "model": "BAAI/bge-small-en-v1.5",
-        "input": "Hello, world!"
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": "A beautiful red dress"
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
       },
       "timeout": 10000
+    },
+    {
+      "name": "multimodal_image_url",
+      "input": {
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400"
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 15000
+    },
+    {
+      "name": "multimodal_multiple_images",
+      "input": {
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": [
+            "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400",
+            "https://images.unsplash.com/photo-1551028719-00167b16eac5?w=400"
+          ]
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 20000
+    },
+    {
+      "name": "multimodal_multiple_texts",
+      "input": {
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": [
+            "A red dress",
+            "A blue shirt",
+            "Black shoes"
+          ]
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 15000
+    },
+    {
+      "name": "multimodal_mixed_text_and_images",
+      "input": {
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": [
+            "A red summer dress",
+            "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400",
+            "A blue winter coat",
+            "https://images.unsplash.com/photo-1551028719-00167b16eac5?w=400"
+          ]
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 25000
+    },
+    {
+      "name": "get_models_list",
+      "input": {
+        "openai_route": "/v1/models",
+        "openai_input": {}
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 5000
     }
   ],
   "config": {
@@ -16,7 +101,7 @@
     "env": [
       {
         "key": "MODEL_NAMES",
-        "value": "BAAI/bge-small-en-v1.5"
+        "value": "patrickjohncyh/fashion-clip"
       }
     ]
   }

From 17fee84a900908ebe329aeda36dcfcece7f4d24b Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 18:10:04 +0300
Subject: [PATCH 04/27] added multimodal routing (images and/or text)

---
 src/multimodal_utils.py | 63 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 src/multimodal_utils.py

diff --git a/src/multimodal_utils.py b/src/multimodal_utils.py
new file mode 100644
index 0000000..fe55477
--- /dev/null
+++ b/src/multimodal_utils.py
@@ -0,0 +1,63 @@
+import base64
+import io
+import re
+from PIL import Image
+
+
+def _is_url(text: str) -> bool:
+    """Check if string is a URL"""
+    return text.startswith('http://') or text.startswith('https://')
+
+
+def _is_base64_image(data: str) -> tuple[bool, Image.Image | None]:
+    """
+    Check if string is a base64-encoded image and decode it.
+    Supports both data URI format (data:image/...) and raw base64.
+    Returns (is_image, PIL_Image or None)
+    """
+    try:
+        # Handle data URI format: data:image/png;base64,iVBORw0KG...
+        if data.startswith('data:'):
+            match = re.match(r'data:image/[^;]+;base64,(.+)', data)
+            if match:
+                base64_data = match.group(1)
+            else:
+                return False, None
+        else:
+            # Try raw base64
+            base64_data = data
+        
+        img_bytes = base64.b64decode(base64_data)
+        
+        img = Image.open(io.BytesIO(img_bytes))
+        img.load()  # Force load to validate it's a real image
+        return True, img
+    except Exception:
+        
+        return False, None
+
+
+async def parse_input_item(item: str | bytes | Image.Image) -> tuple[str, str | Image.Image | bytes]:
+    """
+    Parse a single input item and determine if it's text or image.
+    Returns: (type, processed_data)
+    where type is 'text' or 'image'
+    and processed_data is either the original text or PIL.Image/URL/bytes
+    
+    Note: infinity_emb handles URL downloading internally, so we just pass URLs through.
+    """
+    # Check if it's an image
+    if isinstance(item, (Image.Image, bytes)):
+        return 'image', item
+    
+    if isinstance(item, str):
+        # URL images
+        if _is_url(item):
+            return 'image', item
+        
+        # Base64 images
+        is_base64_img, img = _is_base64_image(item)
+        if is_base64_img and img is not None:
+            return 'image', img
+    
+    return 'text', item if isinstance(item, str) else str(item)

From 3b6a67e0c4a54602bedc47ad904e71ccdd6fc00e Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 18:10:43 +0300
Subject: [PATCH 05/27] support mixed text and image inputs, improve error
 handling and logging

---
 src/embedding_service.py | 109 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 101 insertions(+), 8 deletions(-)

diff --git a/src/embedding_service.py b/src/embedding_service.py
index a5afc44..7f17e36 100644
--- a/src/embedding_service.py
+++ b/src/embedding_service.py
@@ -1,13 +1,20 @@
-from config import EmbeddingServiceConfig
+import asyncio
+import logging
+
 from infinity_emb.engine import AsyncEngineArray, EngineArgs
+from infinity_emb.primitives import ModelNotDeployedError
+from PIL import Image
+
+from config import EmbeddingServiceConfig
+from src.multimodal_utils import parse_input_item
 from utils import (
-    OpenAIModelInfo,
     ModelInfo,
+    OpenAIModelInfo,
     list_embeddings_to_response,
     to_rerank_response,
 )
 
-import asyncio
+logger = logging.getLogger(__name__)
 
 
 class EmbeddingService:
@@ -56,24 +63,110 @@ def list_models(self) -> list[str]:
 
     async def route_openai_get_embeddings(
         self,
-        embedding_input: str | list[str],
+        embedding_input: str | list[str] | list[str | bytes | Image.Image],
         model_name: str,
         return_as_list: bool = False,
     ):
-        """returns embeddings for the input text"""
+        """
+        Returns embeddings for the input text and/or images.
+        Supports mixed text and image inputs while preserving order.
+        """
         if not self.is_running:
             await self.start()
+
+        available_models = self.list_models()
+        if model_name not in available_models:
+            logger.error(
+                f"Requested model '{model_name}' not found. "
+                f"Available models: {available_models}"
+            )
+            raise ValueError(
+                f"Model '{model_name}' is not available. "
+                f"Available models: {', '.join(available_models)}"
+            )
+
         if not isinstance(embedding_input, list):
             embedding_input = [embedding_input]
 
-        embeddings, usage = await self.engine_array[model_name].embed(embedding_input)
+        parsed_items = await asyncio.gather(
+            *[parse_input_item(item) for item in embedding_input]
+        )
+
+        # Separate text and image items while tracking their original indices
+        text_items = []
+        text_indices = []
+        image_items = []
+        image_indices = []
+
+        for idx, (item_type, processed_data) in enumerate(parsed_items):
+            if item_type == "text":
+                text_items.append(processed_data)
+                text_indices.append(idx)
+            else:  # item_type == 'image'
+                image_items.append(processed_data)
+                image_indices.append(idx)
+
+        logger.info(
+            f"Processing embeddings for model '{model_name}': "
+            f"{len(text_items)} text items, {len(image_items)} image items "
+            f"(total: {len(embedding_input)} items)"
+        )
+
+        text_embeddings = []
+        text_usage = 0
+        image_embeddings = []
+        image_usage = 0
+
+        if text_items:
+            logger.debug(f"Calling .embed() with {len(text_items)} text items")
+            text_embeddings, text_usage = await self.engine_array[model_name].embed(
+                text_items
+            )
+            logger.debug(f"Successfully got {len(text_embeddings)} text embeddings")
+
+        if image_items:
+            logger.debug(f"Calling .image_embed() with {len(image_items)} image items")
+            try:
+                image_embeddings, image_usage = await self.engine_array[
+                    model_name
+                ].image_embed(images=image_items)
+                logger.debug(
+                    f"Successfully got {len(image_embeddings)} image embeddings"
+                )
+            except ModelNotDeployedError as e:
+                error_msg = (
+                    f"Model '{model_name}' does not support image embeddings. "
+                    f"Please use a multimodal model (e.g., 'jinaai/jina-clip-v1') "
+                    f"or provide text-only input. Found {len(image_items)} image items in the request."
+                )
+                logger.error(f"{error_msg} Original error: {e}")
+                raise ValueError(error_msg) from e
+
+        # Merge embeddings back in original order
+        total_items = len(embedding_input)
+        ordered_embeddings: list = [None] * total_items
+
+        for idx, embedding in zip(text_indices, text_embeddings):
+            ordered_embeddings[idx] = embedding
+
+        for idx, embedding in zip(image_indices, image_embeddings):
+            ordered_embeddings[idx] = embedding
+
+        total_usage = text_usage + image_usage
+
         if return_as_list:
             return [
-                list_embeddings_to_response(embeddings, model=model_name, usage=usage)
+                list_embeddings_to_response(
+                    ordered_embeddings,
+                    model=model_name,
+                    usage=total_usage,  # type: ignore[arg-type]
+                )
             ]
         else:
             return list_embeddings_to_response(
-                embeddings, model=model_name, usage=usage
+                ordered_embeddings,
+                model=model_name,
+                usage=total_usage,  # type: ignore[arg-type]
             )
 
     async def infinity_rerank(

From ed7b1f85391a4420d178d079f41845f52df71227 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 13 Nov 2025 18:11:02 +0300
Subject: [PATCH 06/27] update README to reflect multimodal support for text
 and image inputs

---
 README.md | 177 ++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 151 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index b22672b..c799f69 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,9 @@
 
 ---
 
-High-throughput, OpenAI-compatible text embedding & reranker powered by [Infinity](https://github.com/michaelfeil/infinity)
+High-throughput, OpenAI-compatible **text & image embedding** & reranker powered by [Infinity](https://github.com/michaelfeil/infinity)
+
+**✨ New: Multimodal Support!** Now supports text, images (URLs & base64), and mixed inputs.
 
 ---
 
@@ -11,14 +13,21 @@ High-throughput, OpenAI-compatible text embedding & reranker powered by [Infinit
 ---
 
 1. [Quickstart](#quickstart)
-2. [Endpoint Configuration](#endpoint-configuration)
-3. [API Specification](#api-specification)
+2. [Multimodal Features](#multimodal-features)
+3. [Endpoint Configuration](#endpoint-configuration)
+4. [API Specification](#api-specification)
    1. [List Models](#list-models)
    2. [Create Embeddings](#create-embeddings)
    3. [Rerank Documents](#rerank-documents)
-4. [Usage](#usage)
-5. [Further Documentation](#further-documentation)
-6. [Acknowledgements](#acknowledgements)
+5. [Usage](#usage)
+   1. [List Models](#list-models-1)
+   2. [Text Embeddings](#text-embeddings)
+   3. [Image Embeddings](#image-embeddings)
+   4. [Mixed Text & Image Inputs](#mixed-text--image-inputs)
+   5. [Reranking](#reranking)
+6. [Testing](#testing)
+7. [Further Documentation](#further-documentation)
+8. [Acknowledgements](#acknowledgements)
 
 ---
 
@@ -31,18 +40,45 @@ High-throughput, OpenAI-compatible text embedding & reranker powered by [Infinit
 
 ---
 
+## Multimodal Features
+
+### Supported Input Types
+
+- ✅ **Text** – traditional text embeddings
+- ✅ **Image URLs** – `http://` or `https://` links to images (`.jpg`, `.png`, `.gif`, etc.)
+- ✅ **Base64 Images** – data URI format (`data:image/png;base64,...`)
+- ✅ **Mixed Inputs** – combine text and images in a single request (order preserved)
+
+### Automatic Type Detection
+
+The worker automatically detects whether your input is text or an image:
+- URLs ending with image extensions → processed as images
+- Data URI with `data:image/...;base64` → decoded and processed as images
+- Everything else → processed as text
+
+### Multimodal Models
+
+To use image embeddings, deploy a multimodal model such as:
+- `patrickjohncyh/fashion-clip` – Fashion-focused CLIP model
+- `jinaai/jina-clip-v1` – General-purpose multimodal embeddings
+- Any other CLIP-based model with `image_embed` support
+
+> **Note:** Text-only models (like `BAAI/bge-small-en-v1.5`) will reject image inputs with a clear error message.
+
+---
+
 ## Endpoint Configuration
 
 All behaviour is controlled through environment variables:
 
-| Variable                 | Required | Default | Description                                                                                                      |
-| ------------------------ | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- |
-| `MODEL_NAMES`            | **Yes**  | —       | One or more Hugging-Face model IDs. Separate multiple IDs with a semicolon.<br>Example: `BAAI/bge-small-en-v1.5` |
-| `BATCH_SIZES`            | No       | `32`    | Per-model batch size; semicolon-separated list matching `MODEL_NAMES`.                                           |
-| `BACKEND`                | No       | `torch` | Inference engine for _all_ models: `torch`, `optimum`, or `ctranslate2`.                                         |
-| `DTYPES`                 | No       | `auto`  | Precision per model (`auto`, `fp16`, `fp8`). Semicolon-separated, must match `MODEL_NAMES`.                      |
-| `INFINITY_QUEUE_SIZE`    | No       | `48000` | Max items queueable inside the Infinity engine.                                                                  |
-| `RUNPOD_MAX_CONCURRENCY` | No       | `300`   | Max concurrent requests the RunPod wrapper will accept.                                                          |
+| Variable                 | Required | Default | Description                                                                                                                                          |
+| ------------------------ | -------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `MODEL_NAMES`            | **Yes**  | —       | One or more Hugging-Face model IDs. Separate multiple IDs with a semicolon.<br>Example: `BAAI/bge-small-en-v1.5;patrickjohncyh/fashion-clip`       |
+| `BATCH_SIZES`            | No       | `32`    | Per-model batch size; semicolon-separated list matching `MODEL_NAMES`.<br>Example: `32;16`                                                          |
+| `BACKEND`                | No       | `torch` | Inference engine for _all_ models: `torch`, `optimum`, or `ctranslate2`.                                                                            |
+| `DTYPES`                 | No       | `auto`  | Precision per model (`auto`, `fp16`, `fp8`). Semicolon-separated, must match `MODEL_NAMES`.<br>Example: `auto;auto`                                 |
+| `INFINITY_QUEUE_SIZE`    | No       | `48000` | Max items queueable inside the Infinity engine.                                                                                                     |
+| `RUNPOD_MAX_CONCURRENCY` | No       | `300`   | Max concurrent requests the RunPod wrapper will accept.                                                                                             |
 
 ---
 
@@ -80,10 +116,10 @@ Except for transport (path + wrapper object) the JSON you send/receive is identi
 
 #### Request Fields (shared)
 
-| Field   | Type                | Required | Description                                       |
-| ------- | ------------------- | -------- | ------------------------------------------------- |
-| `model` | string              | **Yes**  | One of the IDs supplied via `MODEL_NAMES`.        |
-| `input` | string &#124; array | **Yes**  | A single text string _or_ list of texts to embed. |
+| Field   | Type                | Required | Description                                                                                                                |
+| ------- | ------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------- |
+| `model` | string              | **Yes**  | One of the IDs supplied via `MODEL_NAMES`.                                                                                 |
+| `input` | string &#124; array | **Yes**  | Text string(s), image URL(s), base64 image(s), or mixed list. Automatically detects type. Order preserved for mixed inputs.|
 
 OpenAI route vs. Standard:
 
@@ -146,34 +182,123 @@ Below are minimal `curl` snippets so you can copy-paste from any machine.
 
 > Replace `<ENDPOINT_ID>` with your endpoint ID and `<API_KEY>` with a [RunPod API key](https://docs.runpod.io/get-started/api-keys).
 
-### OpenAI-Compatible Calls
+### List Models
 
 ```bash
-# List models
+# OpenAI-compatible format
 curl -H "Authorization: Bearer <API_KEY>" \
      https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/models
 
-# Create embeddings
+# Standard RunPod format
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d '{"input":{"openai_route":"/v1/models"}}' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
+```
+
+### Text Embeddings
+
+```bash
+# OpenAI-compatible format
 curl -X POST \
   -H "Authorization: Bearer <API_KEY>" \
   -H "Content-Type: application/json" \
   -d '{"model":"BAAI/bge-small-en-v1.5","input":"Hello world"}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
+
+# Standard RunPod format
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d '{"input":{"model":"BAAI/bge-small-en-v1.5","input":"Hello world"}}' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
 ```
 
-### Standard RunPod Calls
+### Image Embeddings
 
 ```bash
-# Create embeddings (wait for result)
+# OpenAI-compatible format (image URL)
 curl -X POST \
+  -H "Authorization: Bearer <API_KEY>" \
   -H "Content-Type: application/json" \
-  -d '{"input":{"model":"BAAI/bge-small-en-v1.5","input":"Hello world"}}' \
+  -d '{"model":"patrickjohncyh/fashion-clip","input":"https://example.com/image.jpg"}' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
+
+# Standard RunPod format (base64 image)
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d '{"input":{"model":"patrickjohncyh/fashion-clip","input":"data:image/png;base64,iVBORw0KG..."}}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
+```
+
+### Mixed Text & Image Inputs
+
+```bash
+# OpenAI-compatible format
+curl -X POST \
+  -H "Authorization: Bearer <API_KEY>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "patrickjohncyh/fashion-clip",
+    "input": [
+      "Hello, world!",
+      "https://example.com/image.jpg",
+      "Bye, world!"
+    ]
+  }' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
+
+# Standard RunPod format
+curl -X POST \
+  -H "Content-Type: application/json" \
+  -d '{
+    "input": {
+      "model": "patrickjohncyh/fashion-clip",
+      "input": [
+        "Hello, world!",
+        "https://example.com/image.jpg",
+        "Bye, world!"
+      ]
+    }
+  }' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
+```
+
+**Note:** Output embeddings will be in the same order as inputs!
+
+### Reranking
+
+```bash
+# OpenAI-compatible format
+curl -X POST \
+  -H "Authorization: Bearer <API_KEY>" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "BAAI/bge-reranker-large",
+    "query": "Which product has warranty coverage?",
+    "docs": [
+      "Product A comes with a 2-year warranty",
+      "Product B is available in red and blue colors",
+      "All electronics include a standard 1-year warranty"
+    ],
+    "return_docs": true
+  }' \
+  https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/rerank
 
-# Rerank
+# Standard RunPod format
 curl -X POST \
   -H "Content-Type: application/json" \
-  -d '{"input":{"model":"BAAI/bge-reranker-large","query":"Which product has warranty coverage?","docs":["Product A comes with a 2-year warranty","Product B is available in red and blue colors","All electronics include a standard 1-year warranty"],"return_docs":true}}' \
+  -d '{
+    "input": {
+      "model": "BAAI/bge-reranker-large",
+      "query": "Which product has warranty coverage?",
+      "docs": [
+        "Product A comes with a 2-year warranty",
+        "Product B is available in red and blue colors",
+        "All electronics include a standard 1-year warranty"
+      ],
+      "return_docs": true
+    }
+  }' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
 ```
 

From 267e95971e9d5e2d9c34f4c0d3af57d9bea6e2c8 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:06:47 +0300
Subject: [PATCH 07/27] local test modality ignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index b69f0a6..2ab3544 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,4 +160,5 @@ cython_debug/
 #.idea/
 
 
-data
\ No newline at end of file
+data
+test_modality.py
\ No newline at end of file

From 6aa6a3f8d323f76ba4c80934afebdcd8e089b773 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:07:07 +0300
Subject: [PATCH 08/27] add httpx dependency

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index d82080a..3bb060a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ runpod~=1.7.0
 infinity-emb[all]==0.0.77
 optimum==1.24.0
 einops # deployment of custom code with nomic
+httpx>=0.27.0 
 git+https://github.com/pytorch-labs/float8_experimental.git

From 6b66b7d032c169038a761eb1f18714635c7cb7ad Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:09:32 +0300
Subject: [PATCH 09/27] updated tests

---
 .runpod/tests.json | 60 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/.runpod/tests.json b/.runpod/tests.json
index ed0d318..40e0da3 100644
--- a/.runpod/tests.json
+++ b/.runpod/tests.json
@@ -1,7 +1,24 @@
 {
   "tests": [
     {
-      "name": "multimodal_text",
+      "name": "text_embedding_explicit_modality",
+      "input": {
+        "openai_route": "/v1/embeddings",
+        "openai_input": {
+          "model": "patrickjohncyh/fashion-clip",
+          "input": "A beautiful red dress",
+          "extra_body": {
+            "modality": "text"
+          }
+        }
+      },
+      "expected_output": {
+        "status": "COMPLETED"
+      },
+      "timeout": 10000
+    },
+    {
+      "name": "text_embedding_default_modality",
       "input": {
         "openai_route": "/v1/embeddings",
         "openai_input": {
@@ -15,12 +32,15 @@
       "timeout": 10000
     },
     {
-      "name": "multimodal_image_url",
+      "name": "image_url_embedding",
       "input": {
         "openai_route": "/v1/embeddings",
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
-          "input": "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400"
+          "input": "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg",
+          "extra_body": {
+            "modality": "image"
+          }
         }
       },
       "expected_output": {
@@ -29,15 +49,18 @@
       "timeout": 15000
     },
     {
-      "name": "multimodal_multiple_images",
+      "name": "multiple_images",
       "input": {
         "openai_route": "/v1/embeddings",
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
           "input": [
-            "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400",
-            "https://images.unsplash.com/photo-1551028719-00167b16eac5?w=400"
-          ]
+            "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg",
+            "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
+          ],
+          "extra_body": {
+            "modality": "image"
+          }
         }
       },
       "expected_output": {
@@ -46,7 +69,7 @@
       "timeout": 20000
     },
     {
-      "name": "multimodal_multiple_texts",
+      "name": "multiple_texts",
       "input": {
         "openai_route": "/v1/embeddings",
         "openai_input": {
@@ -55,7 +78,10 @@
             "A red dress",
             "A blue shirt",
             "Black shoes"
-          ]
+          ],
+          "extra_body": {
+            "modality": "text"
+          }
         }
       },
       "expected_output": {
@@ -64,23 +90,21 @@
       "timeout": 15000
     },
     {
-      "name": "multimodal_mixed_text_and_images",
+      "name": "audio_not_implemented",
       "input": {
         "openai_route": "/v1/embeddings",
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
-          "input": [
-            "A red summer dress",
-            "https://images.unsplash.com/photo-1515372039744-b8f02a3ae446?w=400",
-            "A blue winter coat",
-            "https://images.unsplash.com/photo-1551028719-00167b16eac5?w=400"
-          ]
+          "input": "audio data",
+          "extra_body": {
+            "modality": "audio"
+          }
         }
       },
       "expected_output": {
-        "status": "COMPLETED"
+        "status": "FAILED"
       },
-      "timeout": 25000
+      "timeout": 5000
     },
     {
       "name": "get_models_list",

From d86b8ac822470bd60c13316a829c920907da9416 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:09:49 +0300
Subject: [PATCH 10/27] refactor: enhance embedding service with HTTP client
 and improved modality validation

---
 src/embedding_service.py | 197 ++++++++++++++++++++++-----------------
 1 file changed, 112 insertions(+), 85 deletions(-)

diff --git a/src/embedding_service.py b/src/embedding_service.py
index 7f17e36..1946a42 100644
--- a/src/embedding_service.py
+++ b/src/embedding_service.py
@@ -6,7 +6,8 @@
 from PIL import Image
 
 from config import EmbeddingServiceConfig
-from src.multimodal_utils import parse_input_item
+from http_client import create_http_client
+from multimodal_utils import validate_item_for_modality
 from utils import (
     ModelInfo,
     OpenAIModelInfo,
@@ -38,18 +39,28 @@ def __init__(self):
         self.engine_array = AsyncEngineArray.from_args(engine_args)
         self.is_running = False
         self.sepamore = asyncio.Semaphore(1)
+        self.http_client = None
 
     async def start(self):
         """starts the engine background loop"""
         async with self.sepamore:
             if not self.is_running:
                 await self.engine_array.astart()
+                if self.http_client is None:
+                    self.http_client = create_http_client()
+                    logger.info("Created persistent HTTP client for image downloads")
+
                 self.is_running = True
 
     async def stop(self):
         """stops the engine background loop"""
         async with self.sepamore:
             if self.is_running:
+                if self.http_client is not None:
+                    await self.http_client.aclose()
+                    self.http_client = None
+                    logger.info("Closed HTTP client")
+
                 await self.engine_array.astop()
                 self.is_running = False
 
@@ -65,109 +76,125 @@ async def route_openai_get_embeddings(
         self,
         embedding_input: str | list[str] | list[str | bytes | Image.Image],
         model_name: str,
+        modality: str = "text",
         return_as_list: bool = False,
     ):
         """
-        Returns embeddings for the input text and/or images.
-        Supports mixed text and image inputs while preserving order.
-        """
-        if not self.is_running:
-            await self.start()
+        Returns embeddings for the input based on specified modality.
 
-        available_models = self.list_models()
-        if model_name not in available_models:
-            logger.error(
-                f"Requested model '{model_name}' not found. "
-                f"Available models: {available_models}"
-            )
-            raise ValueError(
-                f"Model '{model_name}' is not available. "
-                f"Available models: {', '.join(available_models)}"
-            )
+        Args:
+            embedding_input: Input text(s) or image(s) to embed
+            model_name: Name of the model to use
+            modality: Type of input - "text", "image", or "audio" (not yet implemented)
+            return_as_list: Whether to return results as a list
 
-        if not isinstance(embedding_input, list):
-            embedding_input = [embedding_input]
+        Raises:
+            ValueError: If model not available, modality invalid, or validation fails
+            NotImplementedError: If modality is "audio"
+        """
+        try:
+            if not self.is_running:
+                await self.start()
 
-        parsed_items = await asyncio.gather(
-            *[parse_input_item(item) for item in embedding_input]
-        )
+            available_models = self.list_models()
+            if model_name not in available_models:
+                logger.error(
+                    f"Requested model '{model_name}' not found. "
+                    f"Available models: {available_models}"
+                )
+                raise ValueError(
+                    f"Model '{model_name}' is not available. "
+                    f"Available models: {', '.join(available_models)}"
+                )
 
-        # Separate text and image items while tracking their original indices
-        text_items = []
-        text_indices = []
-        image_items = []
-        image_indices = []
-
-        for idx, (item_type, processed_data) in enumerate(parsed_items):
-            if item_type == "text":
-                text_items.append(processed_data)
-                text_indices.append(idx)
-            else:  # item_type == 'image'
-                image_items.append(processed_data)
-                image_indices.append(idx)
-
-        logger.info(
-            f"Processing embeddings for model '{model_name}': "
-            f"{len(text_items)} text items, {len(image_items)} image items "
-            f"(total: {len(embedding_input)} items)"
-        )
+            if not isinstance(embedding_input, list):
+                embedding_input = [embedding_input]
 
-        text_embeddings = []
-        text_usage = 0
-        image_embeddings = []
-        image_usage = 0
+            # Validate all items for the specified modality in parallel
+            try:
+                validated_items = await asyncio.gather(
+                    *[
+                        validate_item_for_modality(
+                            item, modality, idx, client=self.http_client
+                        )
+                        for idx, item in enumerate(embedding_input)
+                    ]
+                )
+            except (ValueError, NotImplementedError) as e:
+                logger.error(f"Validation failed for modality '{modality}': {e}")
+                raise
 
-        if text_items:
-            logger.debug(f"Calling .embed() with {len(text_items)} text items")
-            text_embeddings, text_usage = await self.engine_array[model_name].embed(
-                text_items
+            logger.info(
+                f"Processing {len(validated_items)} {modality} items for model '{model_name}'"
             )
-            logger.debug(f"Successfully got {len(text_embeddings)} text embeddings")
 
-        if image_items:
-            logger.debug(f"Calling .image_embed() with {len(image_items)} image items")
-            try:
-                image_embeddings, image_usage = await self.engine_array[
-                    model_name
-                ].image_embed(images=image_items)
+            # Route to appropriate embedding method based on modality
+            if modality == "text":
+                logger.debug(f"Calling .embed() with {len(validated_items)} text items")
+                embeddings, usage = await self.engine_array[model_name].embed(
+                    validated_items
+                )
+                logger.debug(f"Successfully got {len(embeddings)} text embeddings")
+
+            elif modality == "image":
                 logger.debug(
-                    f"Successfully got {len(image_embeddings)} image embeddings"
+                    f"Calling .image_embed() with {len(validated_items)} image items"
                 )
-            except ModelNotDeployedError as e:
-                error_msg = (
-                    f"Model '{model_name}' does not support image embeddings. "
-                    f"Please use a multimodal model (e.g., 'jinaai/jina-clip-v1') "
-                    f"or provide text-only input. Found {len(image_items)} image items in the request."
+                try:
+                    embeddings, usage = await self.engine_array[model_name].image_embed(
+                        images=validated_items
+                    )
+                    logger.debug(f"Successfully got {len(embeddings)} image embeddings")
+                except ModelNotDeployedError as e:
+                    error_msg = (
+                        f"Model '{model_name}' does not support image embeddings. "
+                        f"Please use a multimodal model (e.g., 'jinaai/jina-clip-v1') "
+                        f"or use modality='text' instead."
+                    )
+                    logger.error(f"{error_msg} Original error: {e}")
+                    raise ValueError(error_msg) from e
+
+            elif modality == "audio":
+                raise NotImplementedError(
+                    "Audio modality is not yet implemented. "
+                    "Currently supported modalities: 'text', 'image'"
                 )
-                logger.error(f"{error_msg} Original error: {e}")
-                raise ValueError(error_msg) from e
 
-        # Merge embeddings back in original order
-        total_items = len(embedding_input)
-        ordered_embeddings: list = [None] * total_items
-
-        for idx, embedding in zip(text_indices, text_embeddings):
-            ordered_embeddings[idx] = embedding
-
-        for idx, embedding in zip(image_indices, image_embeddings):
-            ordered_embeddings[idx] = embedding
-
-        total_usage = text_usage + image_usage
+            else:
+                raise ValueError(
+                    f"Invalid modality: '{modality}'. "
+                    f"Supported modalities: 'text', 'image', 'audio' (not yet implemented)"
+                )
 
-        if return_as_list:
-            return [
-                list_embeddings_to_response(
-                    ordered_embeddings,
+            if return_as_list:
+                return [
+                    list_embeddings_to_response(
+                        embeddings,
+                        model=model_name,
+                        usage=usage,  # type: ignore[arg-type]
+                    )
+                ]
+            else:
+                return list_embeddings_to_response(
+                    embeddings,
                     model=model_name,
-                    usage=total_usage,  # type: ignore[arg-type]
+                    usage=usage,  # type: ignore[arg-type]
                 )
-            ]
-        else:
-            return list_embeddings_to_response(
-                ordered_embeddings,
-                model=model_name,
-                usage=total_usage,  # type: ignore[arg-type]
+
+        except (ValueError, NotImplementedError) as e:
+            logger.warning(
+                f"Expected error in route_openai_get_embeddings: {type(e).__name__}: {e}"
+            )
+            raise
+        except Exception as e:
+            logger.exception(
+                f"Unexpected error in route_openai_get_embeddings: "
+                f"model='{model_name}', modality='{modality}', "
+                f"input_length={len(embedding_input) if isinstance(embedding_input, list) else 1}"
             )
+            raise RuntimeError(
+                f"Internal error while processing embeddings: {type(e).__name__}: {str(e)}"
+            ) from e
 
     async def infinity_rerank(
         self, query: str, docs: str, return_docs: str, model_name: str

From 9fe3f61965a08da2eae632cc3bcbfc363023e11f Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:10:02 +0300
Subject: [PATCH 11/27] fix: add modality extraction for OpenAI embeddings
 handling

---
 src/handler.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/handler.py b/src/handler.py
index 8f38e78..6b88701 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -2,6 +2,7 @@
 from utils import create_error_response
 from typing import Any
 from embedding_service import EmbeddingService
+from multimodal_utils import extract_modality
 
 # Gracefully catch configuration errors (e.g. missing env vars) so the user sees
 # a clean message instead of a full Python traceback when the container starts.
@@ -25,16 +26,20 @@ async def async_generator_handler(job: dict[str, Any]):
         if openai_route and openai_route == "/v1/models":
             call_fn, kwargs = embedding_service.route_openai_models, {}
         elif openai_route and openai_route == "/v1/embeddings":
-            model_name = openai_input.get("model")
             if not openai_input:
                 return create_error_response("Missing input").model_dump()
+            model_name = openai_input.get("model")
             if not model_name:
                 return create_error_response(
                     "Did not specify model in openai_input"
                 ).model_dump()
+            
+            modality = extract_modality(job_input, openai_input)
+            
             call_fn, kwargs = embedding_service.route_openai_get_embeddings, {
                 "embedding_input": openai_input.get("input"),
                 "model_name": model_name,
+                "modality": modality,
                 "return_as_list": True,
             }
         else:
@@ -51,9 +56,11 @@ async def async_generator_handler(job: dict[str, Any]):
                 "model_name": job_input.get("model"),
             }
         elif job_input.get("input"):
+            modality = extract_modality(job_input)
             call_fn, kwargs = embedding_service.route_openai_get_embeddings, {
                 "embedding_input": job_input.get("input"),
                 "model_name": job_input.get("model"),
+                "modality": modality,
             }
         else:
             return create_error_response(f"Invalid input: {job}").model_dump()

From 802f465900cb7cd712f6fe19932aba3639307c02 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:10:10 +0300
Subject: [PATCH 12/27] feat: implement http client with configurable settings
 and user agent

---
 src/http_client.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 src/http_client.py

diff --git a/src/http_client.py b/src/http_client.py
new file mode 100644
index 0000000..58af57c
--- /dev/null
+++ b/src/http_client.py
@@ -0,0 +1,34 @@
+import httpx
+
+DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+
+DEFAULT_TIMEOUT = 10.0 
+DEFAULT_MAX_CONNECTIONS = 50 
+DEFAULT_MAX_KEEPALIVE_CONNECTIONS = 20 
+
+
+def create_http_client() -> httpx.AsyncClient:
+    """
+    Create a configured httpx.AsyncClient.
+    
+    Returns:
+        httpx.AsyncClient with proper timeout, limits, and headers configured.
+    """
+    limits = httpx.Limits(
+        max_connections=DEFAULT_MAX_CONNECTIONS,
+        max_keepalive_connections=DEFAULT_MAX_KEEPALIVE_CONNECTIONS,
+    )
+    
+    timeout = httpx.Timeout(DEFAULT_TIMEOUT)
+    
+    headers = {
+        "User-Agent": DEFAULT_USER_AGENT,
+    }
+    
+    return httpx.AsyncClient(
+        limits=limits,
+        timeout=timeout,
+        headers=headers,
+        follow_redirects=True,
+        trust_env=True,
+    )

From c96b6575d0510753fac23364acd8c6424465c904 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 16:10:22 +0300
Subject: [PATCH 13/27] feat: enhance image handling with RGB conversion, URL
 downloading, and base64 support

---
 src/multimodal_utils.py | 209 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 186 insertions(+), 23 deletions(-)

diff --git a/src/multimodal_utils.py b/src/multimodal_utils.py
index fe55477..e29a80b 100644
--- a/src/multimodal_utils.py
+++ b/src/multimodal_utils.py
@@ -1,19 +1,77 @@
 import base64
 import io
+import logging
 import re
+from typing import Any
 from PIL import Image
 
+logger = logging.getLogger(__name__)
+
+
+def _ensure_rgb(img: Image.Image) -> Image.Image:
+    """
+    Ensure image is in RGB format (required for CLIP models).
+    
+    Args:
+        img: PIL Image in any mode
+        
+    Returns:
+        PIL Image in RGB mode
+    """
+    if img.mode != 'RGB':
+        logger.debug(f"Converting image from {img.mode} to RGB")
+        return img.convert('RGB')
+    return img
+
 
 def _is_url(text: str) -> bool:
     """Check if string is a URL"""
     return text.startswith('http://') or text.startswith('https://')
 
 
-def _is_base64_image(data: str) -> tuple[bool, Image.Image | None]:
+async def _download_image_from_url(url: str, client) -> Image.Image:
+    """
+    Download image from URL using httpx with proper User-Agent and timeout.
+    
+    Args:
+        url: HTTP(S) URL to download image from
+        client: httpx.AsyncClient instance with configured timeout and limits
+    
+    Returns:
+        PIL.Image in RGB format
+        
+    Raises:
+        ValueError: If download fails or content is not a valid image
+    """
+    try:
+        logger.debug(f"Downloading image from URL: {url}")
+        response = await client.get(url)
+        response.raise_for_status()  # Raises exception for 4xx/5xx status codes
+        
+        img_bytes = response.content
+        logger.debug(f"Downloaded {len(img_bytes)} bytes from {url} (status: {response.status_code})")
+    except Exception as e:
+        raise ValueError(f"Failed to download image from URL: {type(e).__name__}: {e}") from e
+    
+    try:
+        img = Image.open(io.BytesIO(img_bytes))
+        img.load()  # Force load to validate it's a real image
+        logger.debug(f"Successfully loaded image from URL: {img.size} {img.mode}")
+        
+        return _ensure_rgb(img)
+    except Exception as e:
+        raise ValueError(f"Failed to decode image from URL: {type(e).__name__}: {e}") from e
+
+
+def _is_base64_image(data: str) -> Image.Image | None:
     """
-    Check if string is a base64-encoded image and decode it.
+    Try to decode string as base64-encoded image.
     Supports both data URI format (data:image/...) and raw base64.
-    Returns (is_image, PIL_Image or None)
+    
+    Returns:
+        PIL.Image in RGB format, or None if not a valid base64 image
+    
+    Note: Converts all images to RGB format for compatibility with multimodal models.
     """
     try:
         # Handle data URI format: data:image/png;base64,iVBORw0KG...
@@ -21,43 +79,148 @@ def _is_base64_image(data: str) -> tuple[bool, Image.Image | None]:
             match = re.match(r'data:image/[^;]+;base64,(.+)', data)
             if match:
                 base64_data = match.group(1)
+                logger.debug(f"Matched data URI, extracted base64 data (length: {len(base64_data)})")
             else:
-                return False, None
+                logger.debug("data: URI does not match expected format")
+                return None
         else:
             # Try raw base64
             base64_data = data
+            logger.debug("Treating as raw base64")
         
         img_bytes = base64.b64decode(base64_data)
+        logger.debug(f"Decoded base64 to {len(img_bytes)} bytes")
         
         img = Image.open(io.BytesIO(img_bytes))
         img.load()  # Force load to validate it's a real image
-        return True, img
-    except Exception:
+        logger.debug(f"Successfully loaded image: {img.size} {img.mode}")
         
-        return False, None
+        return _ensure_rgb(img)
+    except Exception as e:
+        logger.warning(f"Failed to decode base64 image: {type(e).__name__}: {e}")
+        return None
+
+
+def validate_text_item(item: Any) -> str:
+    """
+    Validate and convert item to text string.
+    Raises ValueError if item cannot be converted to text.
+    """
+    if isinstance(item, str):
+        return item
+    
+    # Try to convert to string
+    try:
+        return str(item)
+    except Exception as e:
+        raise ValueError(f"Cannot convert item to text: {type(item).__name__}") from e
 
 
-async def parse_input_item(item: str | bytes | Image.Image) -> tuple[str, str | Image.Image | bytes]:
+async def validate_image_item(item: Any, client=None) -> Image.Image:
     """
-    Parse a single input item and determine if it's text or image.
-    Returns: (type, processed_data)
-    where type is 'text' or 'image'
-    and processed_data is either the original text or PIL.Image/URL/bytes
+    Validate and process image item.
+    Accepts: PIL.Image, bytes, URL string, or base64 string.
+    Returns PIL.Image in RGB format.
+    Raises ValueError if item is not a valid image format.
     
-    Note: infinity_emb handles URL downloading internally, so we just pass URLs through.
+    Args:
+        item: The image item to validate (PIL.Image, bytes, URL string, or base64 string)
+        client: Optional httpx.AsyncClient for downloading URL images with timeout and User-Agent
+    
+    Note: All images are converted to RGB format for CLIP compatibility.
     """
-    # Check if it's an image
-    if isinstance(item, (Image.Image, bytes)):
-        return 'image', item
+    # PIL Image - convert to RGB if needed
+    if isinstance(item, Image.Image):
+        return _ensure_rgb(item)
+    
+    # Bytes - decode to PIL Image
+    if isinstance(item, bytes):
+        try:
+            img = Image.open(io.BytesIO(item))
+            img.load()
+            logger.debug(f"Loaded image from bytes: {img.size} {img.mode}")
+            return _ensure_rgb(img)
+        except Exception as e:
+            raise ValueError(f"Failed to decode image from bytes: {type(e).__name__}: {e}") from e
     
     if isinstance(item, str):
-        # URL images
+        # URL images - download with proper User-Agent and timeout
         if _is_url(item):
-            return 'image', item
+            if client is None:
+                raise ValueError("HTTP client required for downloading images from URLs")
+            return await _download_image_from_url(item, client)
+        
+        # Base64 images - decode and validate (already converted to RGB)
+        img = _is_base64_image(item)
+        if img is not None:
+            return img
         
-        # Base64 images
-        is_base64_img, img = _is_base64_image(item)
-        if is_base64_img and img is not None:
-            return 'image', img
+        # Not a valid image format
+        raise ValueError(
+            "String is not a valid image format (must be URL starting with http:// or https://, "
+            "or base64-encoded image with 'data:image/...' prefix)"
+        )
+    
+    raise ValueError(
+        f"Invalid image type: {type(item).__name__}. "
+        f"Expected PIL.Image, bytes, URL string, or base64 string."
+    )
+
+
+async def validate_item_for_modality(item: Any, modality: str, index: int, client=None) -> Any:
+    """
+    Validate a single item for the specified modality.
+    
+    Args:
+        item: The input item to validate
+        modality: One of "text", "image", "audio"
+        index: The index of the item in the batch (for error messages)
+        client: Optional httpx.AsyncClient for downloading URL images with timeout
+    
+    Returns:
+        Validated and processed item suitable for infinity_emb
+    
+    Raises:
+        ValueError: If item is not valid for the specified modality
+        NotImplementedError: If modality is "audio"
+    """
+    try:
+        if modality == "text":
+            return validate_text_item(item)
+        elif modality == "image":
+            return await validate_image_item(item, client=client)
+        elif modality == "audio":
+            raise NotImplementedError(
+                "Audio modality is not yet implemented. "
+                "Currently supported modalities: 'text', 'image'"
+            )
+        else:
+            raise ValueError(
+                f"Invalid modality: '{modality}'. "
+                f"Supported modalities: 'text', 'image', 'audio' (not yet implemented)"
+            )
+    except (ValueError, NotImplementedError) as e:
+        # Re-raise with index information for better error messages
+        raise type(e)(f"Item at index {index}: {str(e)}") from e
+
+
+def extract_modality(job_input: dict[str, Any], openai_input: dict[str, Any] | None = None) -> str:
+    """
+    Extract modality parameter from job input.
+    Checks extra_body (OpenAI format) first, then direct input field.
+    Defaults to "text" for backward compatibility.
     
-    return 'text', item if isinstance(item, str) else str(item)
+    Args:
+        job_input: The main job input dictionary
+        openai_input: Optional OpenAI-format input dictionary
+        
+    Returns:
+        Modality string: "text", "image", or "audio"
+    """
+    if openai_input:
+        # OpenAI format: check extra_body
+        extra_body = openai_input.get("extra_body", {})
+        return extra_body.get("modality", "text")
+    else:
+        # Direct format: check top-level modality field
+        return job_input.get("modality", "text")
\ No newline at end of file

From 9db8204d216d60e462944b436c184db0ede0c115 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 23:13:37 +0300
Subject: [PATCH 14/27] remove extract_modality function and simplify modality
 extraction in handler

---
 src/handler.py          |  5 ++---
 src/multimodal_utils.py | 24 +-----------------------
 2 files changed, 3 insertions(+), 26 deletions(-)

diff --git a/src/handler.py b/src/handler.py
index 6b88701..0d43789 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -2,7 +2,6 @@
 from utils import create_error_response
 from typing import Any
 from embedding_service import EmbeddingService
-from multimodal_utils import extract_modality
 
 # Gracefully catch configuration errors (e.g. missing env vars) so the user sees
 # a clean message instead of a full Python traceback when the container starts.
@@ -34,7 +33,7 @@ async def async_generator_handler(job: dict[str, Any]):
                     "Did not specify model in openai_input"
                 ).model_dump()
             
-            modality = extract_modality(job_input, openai_input)
+            modality = openai_input.get("modality", "text")
             
             call_fn, kwargs = embedding_service.route_openai_get_embeddings, {
                 "embedding_input": openai_input.get("input"),
@@ -56,7 +55,7 @@ async def async_generator_handler(job: dict[str, Any]):
                 "model_name": job_input.get("model"),
             }
         elif job_input.get("input"):
-            modality = extract_modality(job_input)
+            modality = job_input.get("modality", "text")
             call_fn, kwargs = embedding_service.route_openai_get_embeddings, {
                 "embedding_input": job_input.get("input"),
                 "model_name": job_input.get("model"),
diff --git a/src/multimodal_utils.py b/src/multimodal_utils.py
index e29a80b..53a8b28 100644
--- a/src/multimodal_utils.py
+++ b/src/multimodal_utils.py
@@ -46,7 +46,7 @@ async def _download_image_from_url(url: str, client) -> Image.Image:
     try:
         logger.debug(f"Downloading image from URL: {url}")
         response = await client.get(url)
-        response.raise_for_status()  # Raises exception for 4xx/5xx status codes
+        response.raise_for_status()
         
         img_bytes = response.content
         logger.debug(f"Downloaded {len(img_bytes)} bytes from {url} (status: {response.status_code})")
@@ -202,25 +202,3 @@ async def validate_item_for_modality(item: Any, modality: str, index: int, clien
     except (ValueError, NotImplementedError) as e:
         # Re-raise with index information for better error messages
         raise type(e)(f"Item at index {index}: {str(e)}") from e
-
-
-def extract_modality(job_input: dict[str, Any], openai_input: dict[str, Any] | None = None) -> str:
-    """
-    Extract modality parameter from job input.
-    Checks extra_body (OpenAI format) first, then direct input field.
-    Defaults to "text" for backward compatibility.
-    
-    Args:
-        job_input: The main job input dictionary
-        openai_input: Optional OpenAI-format input dictionary
-        
-    Returns:
-        Modality string: "text", "image", or "audio"
-    """
-    if openai_input:
-        # OpenAI format: check extra_body
-        extra_body = openai_input.get("extra_body", {})
-        return extra_body.get("modality", "text")
-    else:
-        # Direct format: check top-level modality field
-        return job_input.get("modality", "text")
\ No newline at end of file

From 3ba1f1564ba648585cf75a5327e5e21c9e2756d3 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 17 Nov 2025 23:20:17 +0300
Subject: [PATCH 15/27] fix: update CMD syntax in Dockerfile

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 0155c94..f99387f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,4 +29,4 @@ ADD src .
 COPY test_input.json /test_input.json
 
 # start the handler
-CMD python -u /handler.py
+CMD ["python", "-u", "/handler.py"]

From f61a9cc4769230396f0dc94c57609fcec713bb1e Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Sat, 29 Nov 2025 16:17:18 +0300
Subject: [PATCH 16/27] docs: update README to clarify multimodal support and
 explicit modality selection

---
 README.md | 88 ++++++++++++++++++++-----------------------------------
 1 file changed, 32 insertions(+), 56 deletions(-)

diff --git a/README.md b/README.md
index c799f69..d0bedc6 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 High-throughput, OpenAI-compatible **text & image embedding** & reranker powered by [Infinity](https://github.com/michaelfeil/infinity)
 
-**✨ New: Multimodal Support!** Now supports text, images (URLs & base64), and mixed inputs.
+**✨ New: Multimodal Support!** Now supports text and image embeddings (URLs & base64) with an explicit `modality` switch per request.
 
 ---
 
@@ -23,8 +23,7 @@ High-throughput, OpenAI-compatible **text & image embedding** & reranker powered
    1. [List Models](#list-models-1)
    2. [Text Embeddings](#text-embeddings)
    3. [Image Embeddings](#image-embeddings)
-   4. [Mixed Text & Image Inputs](#mixed-text--image-inputs)
-   5. [Reranking](#reranking)
+   4. [Reranking](#reranking)
 6. [Testing](#testing)
 7. [Further Documentation](#further-documentation)
 8. [Acknowledgements](#acknowledgements)
@@ -42,19 +41,28 @@ High-throughput, OpenAI-compatible **text & image embedding** & reranker powered
 
 ## Multimodal Features
 
-### Supported Input Types
+### Supported Modalities
 
 - ✅ **Text** – traditional text embeddings
 - ✅ **Image URLs** – `http://` or `https://` links to images (`.jpg`, `.png`, `.gif`, etc.)
 - ✅ **Base64 Images** – data URI format (`data:image/png;base64,...`)
-- ✅ **Mixed Inputs** – combine text and images in a single request (order preserved)
 
-### Automatic Type Detection
+Each request targets a single modality:
 
-The worker automatically detects whether your input is text or an image:
-- URLs ending with image extensions → processed as images
-- Data URI with `data:image/...;base64` → decoded and processed as images
-- Everything else → processed as text
+| Modality | How to request                                  | Notes                                             |
+| -------- | ------------------------------------------------ | ------------------------------------------------- |
+| `text`   | Default; or set `modality="text"`               | Works with any deployed embedding model           |
+| `image`  | Set `modality="image"`                          | Requires a multimodal model (see below)           |
+| `audio`  | Planned                                          | Returns a clear `NotImplementedError` for now     |
+
+> **Tip:** For OpenAI-compatible requests, include `"modality": "…"` alongside `model` and `input`. For native `/runsync` requests, pass `modality` inside the `input` object. If omitted, the worker assumes `text`.
+
+### Explicit Modality Selection
+
+- The previous automatic detector has been replaced with an explicit `modality` flag so you stay in control of routing.
+- All inputs are validated eagerly for the chosen modality with detailed, index-aware error messages.
+- Image downloads run through a shared `httpx.AsyncClient` with tuned keep-alive limits, timeouts, and a desktop browser User-Agent—improving compatibility with CDNs that block generic clients.
+- If you configure `extra_body` via an OpenAI SDK, the `modality` field still arrives at the server top-level—no additional parsing is required.
 
 ### Multimodal Models
 
@@ -116,17 +124,18 @@ Except for transport (path + wrapper object) the JSON you send/receive is identi
 
 #### Request Fields (shared)
 
-| Field   | Type                | Required | Description                                                                                                                |
-| ------- | ------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------- |
-| `model` | string              | **Yes**  | One of the IDs supplied via `MODEL_NAMES`.                                                                                 |
-| `input` | string &#124; array | **Yes**  | Text string(s), image URL(s), base64 image(s), or mixed list. Automatically detects type. Order preserved for mixed inputs.|
+| Field      | Type                | Required | Description                                                                                                                |
+| ---------- | ------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------- |
+| `model`    | string              | **Yes**  | One of the IDs supplied via `MODEL_NAMES`.                                                                                 |
+| `input`    | string &#124; array | **Yes**  | Text string(s) or image URL/base64 list matching the selected modality. Order is preserved.                               |
+| `modality` | string              | No       | Required for images. Accepts `text` (default) or `image`. For OpenAI requests supply via `extra_body.modality`.           |
 
 OpenAI route vs. Standard:
 
-| Flavour  | Method | Path             | Body                                          |
-| -------- | ------ | ---------------- | --------------------------------------------- |
-| OpenAI   | `POST` | `/v1/embeddings` | `{ "model": "…", "input": "…" }`              |
-| Standard | `POST` | `/runsync`       | `{ "input": { "model": "…", "input": "…" } }` |
+| Flavour  | Method | Path             | Body                                                                   |
+| -------- | ------ | ---------------- | ---------------------------------------------------------------------- |
+| OpenAI   | `POST` | `/v1/embeddings` | `{ "model": "…", "input": "…", "modality": "text" }` (modality optional for text) |
+| Standard | `POST` | `/runsync`       | `{ "input": { "model": "…", "input": "…", "modality": "text" } }`             |
 
 #### Response (both flavours)
 
@@ -203,13 +212,13 @@ curl -X POST \
 curl -X POST \
   -H "Authorization: Bearer <API_KEY>" \
   -H "Content-Type: application/json" \
-  -d '{"model":"BAAI/bge-small-en-v1.5","input":"Hello world"}' \
+  -d '{"model":"BAAI/bge-small-en-v1.5","input":"Hello world","modality":"text"}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
 
 # Standard RunPod format
 curl -X POST \
   -H "Content-Type: application/json" \
-  -d '{"input":{"model":"BAAI/bge-small-en-v1.5","input":"Hello world"}}' \
+  -d '{"input":{"model":"BAAI/bge-small-en-v1.5","input":"Hello world","modality":"text"}}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
 ```
 
@@ -220,50 +229,17 @@ curl -X POST \
 curl -X POST \
   -H "Authorization: Bearer <API_KEY>" \
   -H "Content-Type: application/json" \
-  -d '{"model":"patrickjohncyh/fashion-clip","input":"https://example.com/image.jpg"}' \
+  -d '{"model":"patrickjohncyh/fashion-clip","input":"https://example.com/image.jpg","modality":"image"}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
 
 # Standard RunPod format (base64 image)
 curl -X POST \
   -H "Content-Type: application/json" \
-  -d '{"input":{"model":"patrickjohncyh/fashion-clip","input":"data:image/png;base64,iVBORw0KG..."}}' \
-  https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
-```
-
-### Mixed Text & Image Inputs
-
-```bash
-# OpenAI-compatible format
-curl -X POST \
-  -H "Authorization: Bearer <API_KEY>" \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "patrickjohncyh/fashion-clip",
-    "input": [
-      "Hello, world!",
-      "https://example.com/image.jpg",
-      "Bye, world!"
-    ]
-  }' \
-  https://api.runpod.ai/v2/<ENDPOINT_ID>/openai/v1/embeddings
-
-# Standard RunPod format
-curl -X POST \
-  -H "Content-Type: application/json" \
-  -d '{
-    "input": {
-      "model": "patrickjohncyh/fashion-clip",
-      "input": [
-        "Hello, world!",
-        "https://example.com/image.jpg",
-        "Bye, world!"
-      ]
-    }
-  }' \
+  -d '{"input":{"model":"patrickjohncyh/fashion-clip","input":"data:image/png;base64,iVBORw0KG...","modality":"image"}}' \
   https://api.runpod.ai/v2/<ENDPOINT_ID>/runsync
 ```
 
-**Note:** Output embeddings will be in the same order as inputs!
+> **Note:** Send one request per modality. If you need both text and image embeddings, issue two calls so each payload is validated consistently.
 
 ### Reranking
 

From d8c8292530ac561d889878f80e37a46e4ad83d82 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Sat, 29 Nov 2025 22:12:35 +0300
Subject: [PATCH 17/27] fix: remove test_modality.py from .gitignore

---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2ab3544..b69f0a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,5 +160,4 @@ cython_debug/
 #.idea/
 
 
-data
-test_modality.py
\ No newline at end of file
+data
\ No newline at end of file

From 96a9afb71618797c7718f04f9c516be98ae654f7 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Sat, 29 Nov 2025 22:13:20 +0300
Subject: [PATCH 18/27] docs: update README to reflect explicit modality
 selection

---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index d0bedc6..d9a3f13 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,6 @@ Each request targets a single modality:
 
 ### Explicit Modality Selection
 
-- The previous automatic detector has been replaced with an explicit `modality` flag so you stay in control of routing.
 - All inputs are validated eagerly for the chosen modality with detailed, index-aware error messages.
 - Image downloads run through a shared `httpx.AsyncClient` with tuned keep-alive limits, timeouts, and a desktop browser User-Agent—improving compatibility with CDNs that block generic clients.
 - If you configure `extra_body` via an OpenAI SDK, the `modality` field still arrives at the server top-level—no additional parsing is required.

From 2e965d77f7626c848e94a24f490e237848e0267b Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 14:23:37 +0300
Subject: [PATCH 19/27] test: add integration tests for explicit modality API
 using pytest

---
 tests/test_modality.py | 442 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 442 insertions(+)
 create mode 100644 tests/test_modality.py

diff --git a/tests/test_modality.py b/tests/test_modality.py
new file mode 100644
index 0000000..a776386
--- /dev/null
+++ b/tests/test_modality.py
@@ -0,0 +1,442 @@
+"""Integration tests for the explicit modality API using pytest."""
+
+import base64
+import io
+from collections.abc import Iterable, Mapping
+from typing import Any
+
+import pytest
+import requests
+from PIL import Image
+
+BASE_URL = "http://localhost:8000"
+RUNSYNC_URL = f"{BASE_URL}/runsync"
+DEFAULT_TIMEOUT_SECONDS = 30
+
+
+def _post_runsync(
+    payload: Mapping[str, Any], timeout: int | float = DEFAULT_TIMEOUT_SECONDS
+) -> requests.Response:
+    """Send a request to the worker and print useful diagnostics."""
+    response = requests.post(RUNSYNC_URL, json=payload, timeout=timeout)
+    print(f"POST {RUNSYNC_URL} -> {response.status_code}")
+    return response
+
+
+def _extract_output(result: Mapping[str, Any]) -> Mapping[str, Any] | None:
+    """Normalise RunPod /runsync output into a single mapping."""
+    output = result.get("output")
+    if isinstance(output, list) and output:
+        return output[0]
+    if isinstance(output, Mapping):
+        return output
+    return None
+
+
+def _extract_error_message(result: Mapping[str, Any]) -> str | None:
+    output = _extract_output(result)
+    if output and output.get("object") == "error":
+        message = output.get("message")
+        return str(message) if message is not None else None
+    return None
+
+
+def generate_red_square_base64() -> str:
+    """Generate a 5x5 red square PNG encoded as a data URI."""
+    img = Image.new("RGB", (5, 5), color=(255, 0, 0))
+    buffer = io.BytesIO()
+    img.save(buffer, format="PNG")
+    buffer.seek(0)
+    img_base64 = base64.b64encode(buffer.read()).decode("utf-8")
+    return f"data:image/png;base64,{img_base64}"
+
+
+@pytest.mark.parametrize(
+    "name,payload,expected_count",
+    [
+        (
+            "text modality",
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "BAAI/bge-small-en-v1.5",
+                        "input": ["Hello world", "How are you?"],
+                        "modality": "text",
+                    },
+                }
+            },
+            2,
+        ),
+        (
+            "image modality (url)",
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "patrickjohncyh/fashion-clip",
+                        "input": [
+                            "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
+                        ],
+                        "modality": "image",
+                    },
+                }
+            },
+            1,
+        ),
+        (
+            "image modality (base64)",
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "patrickjohncyh/fashion-clip",
+                        "input": [generate_red_square_base64()],
+                        "modality": "image",
+                    },
+                }
+            },
+            1,
+        ),
+        (
+            "default text modality",
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "BAAI/bge-small-en-v1.5",
+                        "input": ["Default modality test"],
+                    },
+                }
+            },
+            1,
+        ),
+    ],
+)
+def test_modality_success(
+    name: str, payload: Mapping[str, Any], expected_count: int
+) -> None:
+    response = _post_runsync(payload)
+    assert response.status_code == 200, f"HTTP error for {name}: {response.text}"
+
+    result = response.json()
+    assert result.get("status") == "COMPLETED", (
+        f"Unexpected status for {name}: {result}"
+    )
+
+    output = _extract_output(result) or {}
+    data = output.get("data", [])
+    assert isinstance(data, Iterable), f"Missing data for {name}: {output}"
+
+    if isinstance(data, list):
+        assert len(data) == expected_count, (
+            f"Expected {expected_count} embeddings for {name}, got {len(data)}"
+        )
+    else:
+        pytest.fail(f"Unexpected data format for {name}: {type(data)}")
+
+
+def test_wrong_modality_error() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": [
+                    "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
+                ],
+                "modality": "image",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    error_msg = _extract_error_message(result)
+    assert error_msg is not None, f"Expected error object, got: {result}"
+
+
+def test_audio_not_implemented() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": ["audio data"],
+                "modality": "audio",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    error_msg = _extract_error_message(result)
+    assert error_msg is not None, f"Expected NotImplementedError output, got: {result}"
+    assert "not yet implemented" in error_msg.lower()
+
+
+def test_validation_flexibility() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": ["https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"],
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    assert response.status_code == 200, f"HTTP error: {response.text}"
+
+    result = response.json()
+    assert result.get("status") == "COMPLETED", (
+        f"Expected success treating URL as text: {result}"
+    )
+
+
+@pytest.mark.parametrize(
+    "payload,expected_count",
+    [
+        (
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "BAAI/bge-small-en-v1.5",
+                        "input": [],
+                        "modality": "text",
+                    },
+                }
+            },
+            0,
+        ),
+        (
+            {
+                "input": {
+                    "openai_route": "/v1/embeddings",
+                    "openai_input": {
+                        "model": "BAAI/bge-small-en-v1.5",
+                        "input": "Single text string",
+                        "modality": "text",
+                    },
+                }
+            },
+            1,
+        ),
+    ],
+)
+def test_text_edge_cases(payload: Mapping[str, Any], expected_count: int) -> None:
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    if result.get("status") != "COMPLETED":
+        assert _extract_error_message(result) is not None
+        return
+
+    output = _extract_output(result) or {}
+    data = output.get("data", [])
+    assert isinstance(data, list)
+    assert len(data) == expected_count
+
+
+def test_edge_very_long_text() -> None:
+    long_text = "This is a test sentence. " * 200
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": long_text,
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    assert response.status_code == 200, f"HTTP error: {response.text}"
+
+    result = response.json()
+    assert result.get("status") == "COMPLETED", f"Unexpected status: {result}"
+
+
+def test_edge_empty_string() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": "",
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    if result.get("status") == "COMPLETED":
+        output = _extract_output(result) or {}
+        data = output.get("data", [])
+        assert isinstance(data, list)
+    else:
+        assert _extract_error_message(result) is not None
+
+
+def test_edge_invalid_modality() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": "test",
+                "modality": "video",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    error_msg = _extract_error_message(result)
+    assert error_msg is not None, f"Expected invalid modality error: {result}"
+    assert "invalid modality" in error_msg.lower()
+
+
+def test_edge_missing_model() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "input": "test",
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    error_msg = _extract_error_message(result)
+    assert error_msg is not None, f"Expected missing model error: {result}"
+
+
+def test_edge_nonexistent_model() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "nonexistent/model-12345",
+                "input": "test",
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    error_msg = _extract_error_message(result)
+    assert error_msg is not None, f"Expected model missing error: {result}"
+    assert "not available" in error_msg.lower() or "not found" in error_msg.lower()
+
+
+def test_edge_invalid_image_url() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "patrickjohncyh/fashion-clip",
+                "input": "https://example.com/nonexistent-image-12345.jpg",
+                "modality": "image",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    if response.status_code != 200:
+        assert response.status_code >= 400
+        return
+
+    result = response.json()
+    assert result.get("status") in {"COMPLETED", "FAILED"}, (
+        f"Unexpected status: {result}"
+    )
+
+
+def test_edge_special_characters() -> None:
+    payload = {
+        "input": {
+            "openai_route": "/v1/embeddings",
+            "openai_input": {
+                "model": "BAAI/bge-small-en-v1.5",
+                "input": "Hello 世界! 🌍 Special chars: @#$%^&*()_+-=[]{}|;:',.<>?/~`",
+                "modality": "text",
+            },
+        }
+    }
+
+    response = _post_runsync(payload)
+    assert response.status_code == 200, f"HTTP error: {response.text}"
+
+    result = response.json()
+    assert result.get("status") == "COMPLETED", f"Unexpected status: {result}"
+
+
+def test_edge_corrupted_base64() -> None:
+    invalid_payloads = [
+        "data:image/png;base64,NotValidBase64Data!!!",
+        "data:image/png;base64,iVBORw0KGgoAAAA",
+        "data:image/jpeg;base64,/9j/4AAQSkZJRg",
+        "data:image/png;base64,SGVsbG8gV29ybGQh",
+    ]
+
+    for idx, invalid_base64 in enumerate(invalid_payloads, start=1):
+        print(
+            f"\n  Test variant {idx}/{len(invalid_payloads)}: {invalid_base64[:50]}..."
+        )
+        payload = {
+            "input": {
+                "openai_route": "/v1/embeddings",
+                "openai_input": {
+                    "model": "patrickjohncyh/fashion-clip",
+                    "input": invalid_base64,
+                    "modality": "image",
+                },
+            }
+        }
+
+        response = _post_runsync(payload)
+        if response.status_code != 200:
+            assert response.status_code >= 400
+            continue
+
+        result = response.json()
+        error_msg = _extract_error_message(result)
+        assert error_msg is not None, f"Expected error for corrupted base64: {result}"
+
+    print("\n✓ All corrupted base64 variants handled without crash")

From abde653a3e8d4fb5457c76e514e82f640240ffb6 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 14:26:19 +0300
Subject: [PATCH 20/27] docs: reorder sections in README for improved clarity

---
 README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index d9a3f13..d2839ae 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,8 @@ High-throughput, OpenAI-compatible **text & image embedding** & reranker powered
    2. [Text Embeddings](#text-embeddings)
    3. [Image Embeddings](#image-embeddings)
    4. [Reranking](#reranking)
-6. [Testing](#testing)
-7. [Further Documentation](#further-documentation)
-8. [Acknowledgements](#acknowledgements)
+6. [Further Documentation](#further-documentation)
+7. [Acknowledgements](#acknowledgements)
 
 ---
 

From a84f8cff083d3b763848f15336c5c8a76a34df3e Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 14:32:39 +0300
Subject: [PATCH 21/27] docs: update section on modality selection to
 validation and image fetching defaults

---
 README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index d2839ae..f5b3bfb 100644
--- a/README.md
+++ b/README.md
@@ -56,11 +56,10 @@ Each request targets a single modality:
 
 > **Tip:** For OpenAI-compatible requests, include `"modality": "…"` alongside `model` and `input`. For native `/runsync` requests, pass `modality` inside the `input` object. If omitted, the worker assumes `text`.
 
-### Explicit Modality Selection
+### Validation & Image Fetching Defaults
 
 - All inputs are validated eagerly for the chosen modality with detailed, index-aware error messages.
-- Image downloads run through a shared `httpx.AsyncClient` with tuned keep-alive limits, timeouts, and a desktop browser User-Agent—improving compatibility with CDNs that block generic clients.
-- If you configure `extra_body` via an OpenAI SDK, the `modality` field still arrives at the server top-level—no additional parsing is required.
+- Image downloads run through a shared `httpx.AsyncClient` with tuned keep-alive limits, timeouts, and a desktop browser User-Agent—improving compatibility with CDNs that block generic clients. All of these knobs can be overridden using the `HTTP_CLIENT_*` environment variables listed below.
 
 ### Multimodal Models
 
@@ -85,6 +84,10 @@ All behaviour is controlled through environment variables:
 | `DTYPES`                 | No       | `auto`  | Precision per model (`auto`, `fp16`, `fp8`). Semicolon-separated, must match `MODEL_NAMES`.<br>Example: `auto;auto`                                 |
 | `INFINITY_QUEUE_SIZE`    | No       | `48000` | Max items queueable inside the Infinity engine.                                                                                                     |
 | `RUNPOD_MAX_CONCURRENCY` | No       | `300`   | Max concurrent requests the RunPod wrapper will accept.                                                                                             |
+| `HTTP_CLIENT_USER_AGENT` | No       | Desktop Chrome UA | Override the browser-style User-Agent used for outbound image downloads.                                                                            |
+| `HTTP_CLIENT_TIMEOUT`    | No       | `10.0`  | Request timeout (seconds) for outbound image fetches.                                                                                               |
+| `HTTP_CLIENT_MAX_CONNECTIONS` | No | `50`    | Concurrent connection pool size for the shared `httpx` client.                                                                                      |
+| `HTTP_CLIENT_MAX_KEEPALIVE_CONNECTIONS` | No | `20` | Max keep-alive sockets retained by the shared `httpx` client.                                                                                       |
 
 ---
 

From 6101244a4e7c75ca16e6fd772f1d9dcd9542a198 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 14:32:50 +0300
Subject: [PATCH 22/27] fix: streamline modality specification in test cases by
 removing extra_body

---
 .runpod/tests.json | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/.runpod/tests.json b/.runpod/tests.json
index 40e0da3..114e67f 100644
--- a/.runpod/tests.json
+++ b/.runpod/tests.json
@@ -7,9 +7,7 @@
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
           "input": "A beautiful red dress",
-          "extra_body": {
-            "modality": "text"
-          }
+          "modality": "text"
         }
       },
       "expected_output": {
@@ -38,9 +36,7 @@
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
           "input": "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg",
-          "extra_body": {
-            "modality": "image"
-          }
+          "modality": "image"
         }
       },
       "expected_output": {
@@ -58,9 +54,7 @@
             "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg",
             "https://raw.githubusercontent.com/pytorch/hub/master/images/dog.jpg"
           ],
-          "extra_body": {
-            "modality": "image"
-          }
+          "modality": "image"
         }
       },
       "expected_output": {
@@ -79,9 +73,7 @@
             "A blue shirt",
             "Black shoes"
           ],
-          "extra_body": {
-            "modality": "text"
-          }
+          "modality": "text"
         }
       },
       "expected_output": {
@@ -96,9 +88,7 @@
         "openai_input": {
           "model": "patrickjohncyh/fashion-clip",
           "input": "audio data",
-          "extra_body": {
-            "modality": "audio"
-          }
+          "modality": "audio"
         }
       },
       "expected_output": {

From bcfcc1810c2936c7c9c23a06f52e77ac53670c0a Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 14:33:10 +0300
Subject: [PATCH 23/27] refactor: centralize HTTP client configuration using
 HttpClientConfig

---
 src/config.py      | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 src/http_client.py | 20 +++++++-------
 2 files changed, 76 insertions(+), 11 deletions(-)

diff --git a/src/config.py b/src/config.py
index 6266ed5..bfa28cd 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,10 +1,20 @@
 import os
 from dotenv import load_dotenv
 from functools import cached_property
+from typing import Optional
 
 DEFAULT_BATCH_SIZE = 32
 DEFAULT_BACKEND = "torch"
 
+DEFAULT_HTTP_CLIENT_USER_AGENT = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/120.0.0.0 Safari/537.36"
+)
+DEFAULT_HTTP_CLIENT_TIMEOUT_SECONDS = 10.0
+DEFAULT_HTTP_CLIENT_MAX_CONNECTIONS = 50
+DEFAULT_HTTP_CLIENT_MAX_KEEPALIVE_CONNECTIONS = 20
+
 if not os.environ.get("INFINITY_QUEUE_SIZE"):
     # how many items can be in the queue
     os.environ["INFINITY_QUEUE_SIZE"] = "48000"
@@ -56,3 +66,60 @@ def dtypes(self) -> list[str]:
     @cached_property
     def runpod_max_concurrency(self) -> int:
         return int(os.environ.get("RUNPOD_MAX_CONCURRENCY", 300))
+
+
+class HttpClientConfig:
+    ENV_USER_AGENT = "HTTP_CLIENT_USER_AGENT"
+    ENV_TIMEOUT = "HTTP_CLIENT_TIMEOUT"
+    ENV_MAX_CONNECTIONS = "HTTP_CLIENT_MAX_CONNECTIONS"
+    ENV_MAX_KEEPALIVE = "HTTP_CLIENT_MAX_KEEPALIVE_CONNECTIONS"
+
+    def __init__(self):
+        load_dotenv()
+
+    def _get_env_float(self, key: str, default: float) -> float:
+        value = os.environ.get(key)
+        if value is None:
+            return default
+        try:
+            return float(value)
+        except ValueError as exc:
+            raise ValueError(f"Environment variable {key} must be a float, got {value!r}") from exc
+
+    def _get_env_int(self, key: str, default: int) -> int:
+        value = os.environ.get(key)
+        if value is None:
+            return default
+        try:
+            return int(value)
+        except ValueError as exc:
+            raise ValueError(f"Environment variable {key} must be an integer, got {value!r}") from exc
+
+    def _get_env_str(self, key: str, default: str) -> str:
+        value: Optional[str] = os.environ.get(key)
+        if value is None:
+            return default
+        value = value.strip()
+        return value or default
+
+    @cached_property
+    def user_agent(self) -> str:
+        return self._get_env_str(self.ENV_USER_AGENT, DEFAULT_HTTP_CLIENT_USER_AGENT)
+
+    @cached_property
+    def timeout_seconds(self) -> float:
+        return self._get_env_float(self.ENV_TIMEOUT, DEFAULT_HTTP_CLIENT_TIMEOUT_SECONDS)
+
+    @cached_property
+    def max_connections(self) -> int:
+        return self._get_env_int(
+            self.ENV_MAX_CONNECTIONS,
+            DEFAULT_HTTP_CLIENT_MAX_CONNECTIONS,
+        )
+
+    @cached_property
+    def max_keepalive_connections(self) -> int:
+        return self._get_env_int(
+            self.ENV_MAX_KEEPALIVE,
+            DEFAULT_HTTP_CLIENT_MAX_KEEPALIVE_CONNECTIONS,
+        )
diff --git a/src/http_client.py b/src/http_client.py
index 58af57c..dc01a8b 100644
--- a/src/http_client.py
+++ b/src/http_client.py
@@ -1,10 +1,8 @@
 import httpx
 
-DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+from config import HttpClientConfig
 
-DEFAULT_TIMEOUT = 10.0 
-DEFAULT_MAX_CONNECTIONS = 50 
-DEFAULT_MAX_KEEPALIVE_CONNECTIONS = 20 
+_CLIENT_CONFIG = HttpClientConfig()
 
 
 def create_http_client() -> httpx.AsyncClient:
@@ -15,16 +13,16 @@ def create_http_client() -> httpx.AsyncClient:
         httpx.AsyncClient with proper timeout, limits, and headers configured.
     """
     limits = httpx.Limits(
-        max_connections=DEFAULT_MAX_CONNECTIONS,
-        max_keepalive_connections=DEFAULT_MAX_KEEPALIVE_CONNECTIONS,
+        max_connections=_CLIENT_CONFIG.max_connections,
+        max_keepalive_connections=_CLIENT_CONFIG.max_keepalive_connections,
     )
-    
-    timeout = httpx.Timeout(DEFAULT_TIMEOUT)
-    
+
+    timeout = httpx.Timeout(_CLIENT_CONFIG.timeout_seconds)
+
     headers = {
-        "User-Agent": DEFAULT_USER_AGENT,
+        "User-Agent": _CLIENT_CONFIG.user_agent,
     }
-    
+
     return httpx.AsyncClient(
         limits=limits,
         timeout=timeout,

From 108ee4d7353336cd8f5038628c64f3847429683a Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Mon, 1 Dec 2025 15:07:17 +0300
Subject: [PATCH 24/27] fix: enhance error handling for modality in embedding
 routes

---
 src/embedding_service.py | 66 ++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 27 deletions(-)

diff --git a/src/embedding_service.py b/src/embedding_service.py
index 1946a42..d812d4f 100644
--- a/src/embedding_service.py
+++ b/src/embedding_service.py
@@ -129,42 +129,54 @@ async def route_openai_get_embeddings(
             )
 
             # Route to appropriate embedding method based on modality
-            if modality == "text":
-                logger.debug(f"Calling .embed() with {len(validated_items)} text items")
-                embeddings, usage = await self.engine_array[model_name].embed(
-                    validated_items
-                )
-                logger.debug(f"Successfully got {len(embeddings)} text embeddings")
+            try:
+                if modality == "text":
+                    logger.debug(
+                        f"Calling .embed() with {len(validated_items)} text items"
+                    )
+                    embeddings, usage = await self.engine_array[model_name].embed(
+                        validated_items
+                    )
+                    logger.debug(
+                        f"Successfully got {len(embeddings)} text embeddings"
+                    )
 
-            elif modality == "image":
-                logger.debug(
-                    f"Calling .image_embed() with {len(validated_items)} image items"
-                )
-                try:
+                elif modality == "image":
+                    logger.debug(
+                        f"Calling .image_embed() with {len(validated_items)} image items"
+                    )
                     embeddings, usage = await self.engine_array[model_name].image_embed(
                         images=validated_items
                     )
-                    logger.debug(f"Successfully got {len(embeddings)} image embeddings")
-                except ModelNotDeployedError as e:
+                    logger.debug(
+                        f"Successfully got {len(embeddings)} image embeddings"
+                    )
+
+                elif modality == "audio":
+                    raise NotImplementedError(
+                        "Audio modality is not yet implemented. "
+                        "Currently supported modalities: 'text', 'image'"
+                    )
+
+                else:
+                    raise ValueError(
+                        f"Invalid modality: '{modality}'. "
+                        f"Supported modalities: 'text', 'image', 'audio' (not yet implemented)"
+                    )
+            except ModelNotDeployedError as e:
+                if modality == "image":
                     error_msg = (
                         f"Model '{model_name}' does not support image embeddings. "
                         f"Please use a multimodal model (e.g., 'jinaai/jina-clip-v1') "
                         f"or use modality='text' instead."
                     )
-                    logger.error(f"{error_msg} Original error: {e}")
-                    raise ValueError(error_msg) from e
-
-            elif modality == "audio":
-                raise NotImplementedError(
-                    "Audio modality is not yet implemented. "
-                    "Currently supported modalities: 'text', 'image'"
-                )
-
-            else:
-                raise ValueError(
-                    f"Invalid modality: '{modality}'. "
-                    f"Supported modalities: 'text', 'image', 'audio' (not yet implemented)"
-                )
+                else:
+                    error_msg = (
+                        f"Model '{model_name}' is not deployed or does not support "
+                        f"{modality} embeddings."
+                    )
+                logger.error(f"{error_msg} Original error: {e}")
+                raise ValueError(error_msg) from e
 
             if return_as_list:
                 return [

From 4c7838bcf89a6030e87087e5c0cba31443bfc464 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Tue, 2 Dec 2025 07:38:04 +0300
Subject: [PATCH 25/27] fix: update default User-Agent string for HTTP client
 in README

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f5b3bfb..d8c55fe 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ All behaviour is controlled through environment variables:
 | `DTYPES`                 | No       | `auto`  | Precision per model (`auto`, `fp16`, `fp8`). Semicolon-separated, must match `MODEL_NAMES`.<br>Example: `auto;auto`                                 |
 | `INFINITY_QUEUE_SIZE`    | No       | `48000` | Max items queueable inside the Infinity engine.                                                                                                     |
 | `RUNPOD_MAX_CONCURRENCY` | No       | `300`   | Max concurrent requests the RunPod wrapper will accept.                                                                                             |
-| `HTTP_CLIENT_USER_AGENT` | No       | Desktop Chrome UA | Override the browser-style User-Agent used for outbound image downloads.                                                                            |
+| `HTTP_CLIENT_USER_AGENT` | No       | `Mozilla/5.0 ... Chrome/120.0.0.0 Safari/537.36` | Override the browser-style User-Agent used for outbound image downloads.                                                                            |
 | `HTTP_CLIENT_TIMEOUT`    | No       | `10.0`  | Request timeout (seconds) for outbound image fetches.                                                                                               |
 | `HTTP_CLIENT_MAX_CONNECTIONS` | No | `50`    | Concurrent connection pool size for the shared `httpx` client.                                                                                      |
 | `HTTP_CLIENT_MAX_KEEPALIVE_CONNECTIONS` | No | `20` | Max keep-alive sockets retained by the shared `httpx` client.                                                                                       |

From b5cd5af9ce61a9101d0407d5ea5d289b546008b4 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Tue, 2 Dec 2025 07:38:42 +0300
Subject: [PATCH 26/27] fix: improve error messages for model availability and
 modality capabilities

---
 src/embedding_service.py | 36 +++++++++++++++++++-----------------
 tests/test_modality.py   |  7 ++++++-
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/src/embedding_service.py b/src/embedding_service.py
index d812d4f..0dc4e30 100644
--- a/src/embedding_service.py
+++ b/src/embedding_service.py
@@ -98,14 +98,16 @@ async def route_openai_get_embeddings(
 
             available_models = self.list_models()
             if model_name not in available_models:
-                logger.error(
-                    f"Requested model '{model_name}' not found. "
-                    f"Available models: {available_models}"
+                available_models_msg = (
+                    ", ".join(available_models) if available_models else "none"
                 )
-                raise ValueError(
-                    f"Model '{model_name}' is not available. "
-                    f"Available models: {', '.join(available_models)}"
+                error_msg = (
+                    f"Model '{model_name}' is not deployed. "
+                    f"Available deployments: {available_models_msg}. "
+                    f"Deploy the requested model or choose one of the available models."
                 )
+                logger.error(error_msg)
+                raise ValueError(error_msg)
 
             if not isinstance(embedding_input, list):
                 embedding_input = [embedding_input]
@@ -164,17 +166,17 @@ async def route_openai_get_embeddings(
                         f"Supported modalities: 'text', 'image', 'audio' (not yet implemented)"
                     )
             except ModelNotDeployedError as e:
-                if modality == "image":
-                    error_msg = (
-                        f"Model '{model_name}' does not support image embeddings. "
-                        f"Please use a multimodal model (e.g., 'jinaai/jina-clip-v1') "
-                        f"or use modality='text' instead."
-                    )
-                else:
-                    error_msg = (
-                        f"Model '{model_name}' is not deployed or does not support "
-                        f"{modality} embeddings."
-                    )
+                available_capabilities = sorted(
+                    getattr(self.engine_array[model_name], "capabilities", set())
+                )
+                capabilities_hint = (
+                    ", ".join(available_capabilities) if available_capabilities else "none"
+                )
+                error_msg = (
+                    f"Model '{model_name}' does not expose the '{modality}' capability. "
+                    f"Detected capabilities: {capabilities_hint}. Deploy a model that supports "
+                    f"this modality or adjust the request."
+                )
                 logger.error(f"{error_msg} Original error: {e}")
                 raise ValueError(error_msg) from e
 
diff --git a/tests/test_modality.py b/tests/test_modality.py
index a776386..870188d 100644
--- a/tests/test_modality.py
+++ b/tests/test_modality.py
@@ -158,6 +158,9 @@ def test_wrong_modality_error() -> None:
     result = response.json()
     error_msg = _extract_error_message(result)
     assert error_msg is not None, f"Expected error object, got: {result}"
+    lowered = error_msg.lower()
+    assert "does not expose the 'image' capability" in lowered
+    assert "detected capabilities" in lowered
 
 
 def test_audio_not_implemented() -> None:
@@ -362,7 +365,9 @@ def test_edge_nonexistent_model() -> None:
     result = response.json()
     error_msg = _extract_error_message(result)
     assert error_msg is not None, f"Expected model missing error: {result}"
-    assert "not available" in error_msg.lower() or "not found" in error_msg.lower()
+    lowered = error_msg.lower()
+    assert "is not deployed" in lowered
+    assert "available deployments" in lowered
 
 
 def test_edge_invalid_image_url() -> None:

From ceee11308c2a63da48a94f4923a368484dd56f20 Mon Sep 17 00:00:00 2001
From: vadim <vingvadim@gmail.com>
Date: Thu, 11 Dec 2025 07:13:11 +0300
Subject: [PATCH 27/27] fix: reorder imports for better organization

---
 src/utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/utils.py b/src/utils.py
index 1a59304..4d8b42e 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -1,8 +1,7 @@
-from http import HTTPStatus
-from typing import Annotated, Any, Dict, List, Literal, Optional, Union
-from typing import Any, Dict, Iterable, List, Optional, Union
-from uuid import uuid4
 import time
+from http import HTTPStatus
+from typing import Annotated, Any, Dict, Iterable, List, Literal, Optional, Union
+
 import numpy as np
 import numpy.typing as npt
 from pydantic import BaseModel, Field, conlist