microsoft · baijumeswani · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/sdk/python/src/openai/audio_client.py b/sdk/python/src/openai/audio_client.py
@@ -7,8 +7,10 @@
 
 import json
 import logging
+import queue
+import threading
 from dataclasses import dataclass
-from typing import Callable, Optional
+from typing import Generator, List, Optional
 
 from ..detail.core_interop import CoreInterop, InteropRequest
 from ..exception import FoundryLocalException
@@ -114,40 +116,62 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
         data = json.loads(response.data)
         return AudioTranscriptionResponse(text=data.get("text", ""))
 
+    def _stream_chunks(self, request_json: str) -> Generator[AudioTranscriptionResponse, None, None]:
+        """Background-thread generator that yields parsed chunks from the native streaming call."""
+        _SENTINEL = object()
+        chunk_queue: queue.Queue = queue.Queue()
+        errors: List[Exception] = []
+
+        def _on_chunk(chunk_str: str) -> None:
+            chunk_data = json.loads(chunk_str)
+            chunk_queue.put(AudioTranscriptionResponse(text=chunk_data.get("text", "")))
+
+        def _run() -> None:
+            try:
+                resp = self._core_interop.execute_command_with_callback(
+                    "audio_transcribe",
+                    InteropRequest(params={"OpenAICreateRequest": request_json}),
+                    _on_chunk,
+                )
+                if resp.error is not None:
+                    errors.append(
+                        FoundryLocalException(
+                            f"Streaming audio transcription failed for model '{self.model_id}': {resp.error}"
+                        )
+                    )
+            except Exception as exc:
+                errors.append(exc)
+            finally:
+                chunk_queue.put(_SENTINEL)
+
+        threading.Thread(target=_run, daemon=True).start()
+        while (item := chunk_queue.get()) is not _SENTINEL:
+            yield item
+        if errors:
+            raise errors[0]
+
     def transcribe_streaming(
         self,
         audio_file_path: str,
-        callback: Callable[[AudioTranscriptionResponse], None],
-    ) -> None:
+    ) -> Generator[AudioTranscriptionResponse, None, None]:
         """Transcribe an audio file with streaming chunks.
 
-        Each chunk is passed to *callback* as an ``AudioTranscriptionResponse``.
+        Consume with a standard ``for`` loop::
+
+            for chunk in audio_client.transcribe_streaming("recording.mp3"):
+                print(chunk.text, end="", flush=True)
 
         Args:
             audio_file_path: Path to the audio file to transcribe.
-            callback: Called with each incremental transcription chunk.
+
+        Returns:
+            A generator of ``AudioTranscriptionResponse`` objects.
 
         Raises:
             ValueError: If *audio_file_path* is not a non-empty string.
             FoundryLocalException: If the underlying native transcription command fails.
         """
         self._validate_audio_file_path(audio_file_path)
 
-        if not callable(callback):
-            raise TypeError("Callback must be a valid function.")
-
         request_json = self._create_request_json(audio_file_path)
-        request = InteropRequest(params={"OpenAICreateRequest": request_json})
-
-        def callback_handler(chunk_str: str):
-            chunk_data = json.loads(chunk_str)
-            chunk = AudioTranscriptionResponse(text=chunk_data.get("text", ""))
-            callback(chunk)
-
-        response = self._core_interop.execute_command_with_callback(
-            "audio_transcribe", request, callback_handler
-        )
-        if response.error is not None:
-            raise FoundryLocalException(
-                f"Streaming audio transcription failed for model '{self.model_id}': {response.error}"
-            )
+        return self._stream_chunks(request_json)
diff --git a/sdk/python/test/README.md b/sdk/python/test/README.md
@@ -50,10 +50,10 @@ test/
 │   └── test_model_load_manager.py     # ModelLoadManager core interop & web service (5 tests)
 └── openai/
     ├── test_chat_client.py            # Chat completions, streaming, error validation (7 tests)
-    └── test_audio_client.py           # Audio transcription (7 tests)
+    └── test_audio_client.py           # Audio transcription (6 tests)
 ```
 
-**Total: 32 tests**
+**Total: 31 tests**
 
 ## Key conventions
 

diff --git a/sdk/python/test/openai/test_audio_client.py b/sdk/python/test/openai/test_audio_client.py
@@ -88,16 +88,13 @@ def test_should_transcribe_audio_streaming(self, catalog):
             audio_client.settings.temperature = 0.0
 
             chunks = []
-
-            def on_chunk(chunk):
+            for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
                 assert chunk is not None
                 assert hasattr(chunk, "text")
                 assert isinstance(chunk.text, str)
                 assert len(chunk.text) > 0
                 chunks.append(chunk.text)
 
-            audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk)
-
             full_text = "".join(chunks)
             assert full_text == EXPECTED_TEXT
         finally:
@@ -114,14 +111,11 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
             audio_client.settings.temperature = 0.0
 
             chunks = []
-
-            def on_chunk(chunk):
+            for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
                 assert chunk is not None
                 assert isinstance(chunk.text, str)
                 chunks.append(chunk.text)
 
-            audio_client.transcribe_streaming(AUDIO_FILE_PATH, on_chunk)
-
             full_text = "".join(chunks)
             assert full_text == EXPECTED_TEXT
         finally:
@@ -143,14 +137,4 @@ def test_should_raise_for_streaming_empty_audio_file_path(self, catalog):
         audio_client = model.get_audio_client()
 
         with pytest.raises(ValueError, match="Audio file path must be a non-empty string"):
-            audio_client.transcribe_streaming("", lambda chunk: None)
-
-    def test_should_raise_for_streaming_invalid_callback(self, catalog):
-        """transcribe_streaming with invalid callback should raise."""
-        model = catalog.get_model(AUDIO_MODEL_ALIAS)
-        assert model is not None
-        audio_client = model.get_audio_client()
-
-        for invalid_callback in [None, 42, {}, "not a function"]:
-            with pytest.raises(TypeError, match="Callback must be a valid function"):
-                audio_client.transcribe_streaming(AUDIO_FILE_PATH, invalid_callback)
+            audio_client.transcribe_streaming("")