From d12667ede5fe985f944ea1212b31b61ea727660a Mon Sep 17 00:00:00 2001 From: examples-bot Date: Wed, 8 Apr 2026 08:55:08 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(examples):=20add=20521=20=E2=80=94=20D?= =?UTF-8?q?eepgram=20Proxy=20Server=20(Python=20+=20UV)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Python counterpart to the Node.js proxy (520). FastAPI server proxying Deepgram STT and TTS with the API key kept server-side. --- .../521-deepgram-proxy-python-uv/.env.example | 2 + .../521-deepgram-proxy-python-uv/.gitignore | 4 + .../521-deepgram-proxy-python-uv/README.md | 62 +++++ .../requirements.txt | 7 + .../src/__init__.py | 0 .../src/client.html | 179 ++++++++++++++ .../src/server.py | 192 +++++++++++++++ .../tests/test_example.py | 227 ++++++++++++++++++ 8 files changed, 673 insertions(+) create mode 100644 examples/521-deepgram-proxy-python-uv/.env.example create mode 100644 examples/521-deepgram-proxy-python-uv/.gitignore create mode 100644 examples/521-deepgram-proxy-python-uv/README.md create mode 100644 examples/521-deepgram-proxy-python-uv/requirements.txt create mode 100644 examples/521-deepgram-proxy-python-uv/src/__init__.py create mode 100644 examples/521-deepgram-proxy-python-uv/src/client.html create mode 100644 examples/521-deepgram-proxy-python-uv/src/server.py create mode 100644 examples/521-deepgram-proxy-python-uv/tests/test_example.py diff --git a/examples/521-deepgram-proxy-python-uv/.env.example b/examples/521-deepgram-proxy-python-uv/.env.example new file mode 100644 index 0000000..99314a3 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/.env.example @@ -0,0 +1,2 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= diff --git a/examples/521-deepgram-proxy-python-uv/.gitignore b/examples/521-deepgram-proxy-python-uv/.gitignore new file mode 100644 index 0000000..d328f57 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/.gitignore @@ -0,0 +1,4 @@ +__pycache__/ +*.pyc +.env +.venv/ diff --git a/examples/521-deepgram-proxy-python-uv/README.md b/examples/521-deepgram-proxy-python-uv/README.md new file mode 100644 index 0000000..7bed814 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/README.md @@ -0,0 +1,62 @@ +# Deepgram Proxy Server (Python + UV) + +A Python FastAPI proxy server that sits between client applications and the Deepgram API, keeping your API key secure on the server side. Uses UV for fast dependency management. This is the Python counterpart to the Node.js proxy server (example 520). + +## What you'll build + +A FastAPI server that proxies three types of Deepgram requests: pre-recorded transcription (REST), live streaming transcription (WebSocket), and text-to-speech (REST). A minimal browser client demonstrates all three features through the proxy. + +## Prerequisites + +- Python 3.10+ +- [UV](https://docs.astral.sh/uv/) (`pip install uv` or `curl -LsSf https://astral.sh/uv/install.sh | sh`) +- Deepgram account — [get a free API key](https://console.deepgram.com/) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) → Settings → API Keys | + +## Install and run + +```bash +cp .env.example .env +# Add your DEEPGRAM_API_KEY to .env + +uv pip install -r requirements.txt +uv run uvicorn src.server:app --reload --port 3000 +# Open http://localhost:3000 +``` + +## API endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/v1/listen` | Pre-recorded transcription — send `{ "url": "..." }` | +| `POST` | `/v1/speak` | Text-to-speech — send `{ "text": "..." }` | +| `WS` | `/v1/listen/stream` | Live STT — stream raw linear16 audio, receive JSON transcripts | +| `GET` | `/health` | Health check | +| `GET` | `/` | Demo client UI | + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `model` | `nova-3` | Latest general-purpose STT model | +| `smart_format` | `true` | Adds punctuation, capitalisation, number formatting | +| `interim_results` | `true` | Partial transcripts while speaker is still talking | +| `encoding` | `linear16` | Raw PCM format for WebSocket audio | +| `sample_rate` | `16000` | 16 kHz sample rate for WebSocket audio | + +## How it works + +1. The proxy server starts and reads `DEEPGRAM_API_KEY` from the environment — it never forwards the key to clients +2. **Pre-recorded**: Client POSTs a JSON body with an audio URL to `/v1/listen`. The server calls `client.listen.v1.media.transcribe_url()` and returns the full Deepgram response +3. **Live STT**: Client opens a WebSocket to `/v1/listen/stream`. The server opens a parallel connection to Deepgram via `client.listen.v1.connect()`, bridges audio from client to Deepgram, and relays transcript JSON back +4. **TTS**: Client POSTs text to `/v1/speak`. The server calls `client.speak.v1.audio.generate()` and streams the audio bytes back +5. The API key never leaves the server — clients interact only with the proxy endpoints + +## Starter templates + +[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) diff --git a/examples/521-deepgram-proxy-python-uv/requirements.txt b/examples/521-deepgram-proxy-python-uv/requirements.txt new file mode 100644 index 0000000..5443988 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/requirements.txt @@ -0,0 +1,7 @@ +deepgram-sdk==6.1.1 +fastapi==0.135.3 +starlette==1.0.0 +uvicorn[standard]==0.34.2 +python-dotenv==1.1.0 +websockets==16.0 +httpx==0.28.1 diff --git a/examples/521-deepgram-proxy-python-uv/src/__init__.py b/examples/521-deepgram-proxy-python-uv/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/521-deepgram-proxy-python-uv/src/client.html b/examples/521-deepgram-proxy-python-uv/src/client.html new file mode 100644 index 0000000..af69080 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/src/client.html @@ -0,0 +1,179 @@ + + + + + + Deepgram Proxy — Demo Client + + + +

Deepgram Proxy Demo

+

All Deepgram API calls go through the proxy — your API key stays server-side.

+ +
+

Live Microphone Transcription

+ +
+
+ +
+

Pre-recorded Transcription

+ + + +
+
+ +
+

Text-to-Speech

+ + + +
+
+ + + + diff --git a/examples/521-deepgram-proxy-python-uv/src/server.py b/examples/521-deepgram-proxy-python-uv/src/server.py new file mode 100644 index 0000000..2907326 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/src/server.py @@ -0,0 +1,192 @@ +"""FastAPI proxy server that keeps the Deepgram API key server-side. + +Clients interact only with this proxy — the key never leaves the server. +This is the recommended pattern for browser-based apps that need +Deepgram STT or TTS without exposing secrets. + +Endpoints: + POST /v1/listen — pre-recorded transcription (URL or file upload) + POST /v1/speak — text-to-speech (returns audio bytes) + WS /v1/listen/stream — live STT streaming (bidirectional WebSocket) + GET /health — health check + +Usage: + uvicorn src.server:app --reload +""" + +import asyncio +import json +import os + +from dotenv import load_dotenv +from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect +from fastapi.responses import HTMLResponse, StreamingResponse +from pydantic import BaseModel + +from deepgram import AsyncDeepgramClient +from deepgram.core.events import EventType +from deepgram.listen.v1.types import ListenV1Results + +load_dotenv() + +LIVE_OPTIONS = { + "model": "nova-3", + "encoding": "linear16", + "sample_rate": 16000, + "channels": 1, + "tag": "deepgram-examples", + "request_options": { + "additional_query_parameters": { + "smart_format": "true", + "interim_results": "true", + "utterance_end_ms": "1500", + } + }, +} + + +def _get_api_key() -> str: + key = os.environ.get("DEEPGRAM_API_KEY", "") + if not key: + raise HTTPException( + status_code=500, + detail="DEEPGRAM_API_KEY is not set. Copy .env.example to .env and add your key.", + ) + return key + + +def create_app() -> FastAPI: + application = FastAPI( + title="Deepgram Proxy Server", + description="Proxies Deepgram STT and TTS requests, keeping the API key server-side.", + version="1.0.0", + ) + + class ListenUrlBody(BaseModel): + url: str + model: str = "nova-3" + smart_format: bool = True + diarize: bool = False + + class SpeakBody(BaseModel): + text: str + model: str = "aura-2-asteria-en" + + # -- REST: pre-recorded transcription ------------------------------------ + @application.post("/v1/listen") + async def listen_url(body: ListenUrlBody): + _get_api_key() + client = AsyncDeepgramClient() + try: + response = await client.listen.v1.media.transcribe_url( + url=body.url, + model=body.model, + smart_format=body.smart_format, + diarize=body.diarize, + tag="deepgram-examples", + ) + except Exception as exc: + raise HTTPException(status_code=502, detail=f"Transcription failed: {exc}") from exc + + return response.model_dump() + + # -- REST: text-to-speech ------------------------------------------------ + @application.post("/v1/speak") + async def speak(body: SpeakBody): + _get_api_key() + client = AsyncDeepgramClient() + try: + audio_iter = client.speak.v1.audio.generate( + text=body.text, + model=body.model, + encoding="mp3", + tag="deepgram-examples", + ) + except Exception as exc: + raise HTTPException(status_code=502, detail=f"TTS failed: {exc}") from exc + + return StreamingResponse(audio_iter, media_type="audio/mpeg") + + # -- WebSocket: live STT streaming --------------------------------------- + @application.websocket("/v1/listen/stream") + async def listen_stream(ws: WebSocket): + await ws.accept() + _get_api_key() + client = AsyncDeepgramClient() + + async with client.listen.v1.connect(**LIVE_OPTIONS) as dg_connection: + async def on_message(message) -> None: + if isinstance(message, ListenV1Results): + transcript = message.channel.alternatives[0].transcript + payload = json.dumps({ + "channel": { + "alternatives": [{ + "transcript": transcript, + "confidence": message.channel.alternatives[0].confidence, + "words": [ + {"word": w.word, "start": w.start, "end": w.end, "confidence": w.confidence} + for w in (message.channel.alternatives[0].words or []) + ], + }] + }, + "is_final": message.is_final, + }) + try: + await ws.send_text(payload) + except Exception: + pass + + async def on_error(error) -> None: + try: + await ws.send_text(json.dumps({"error": str(error)})) + except Exception: + pass + + dg_connection.on(EventType.MESSAGE, on_message) + dg_connection.on(EventType.ERROR, on_error) + + listener_task = asyncio.create_task(dg_connection.start_listening()) + + try: + while True: + data = await ws.receive_bytes() + await dg_connection.send_media(data) + except WebSocketDisconnect: + pass + except Exception: + pass + finally: + try: + await dg_connection.send_close_stream() + except Exception: + pass + listener_task.cancel() + + # -- Health check -------------------------------------------------------- + @application.get("/health") + async def health(): + return {"status": "ok", "service": "deepgram-proxy"} + + # -- Demo client --------------------------------------------------------- + @application.get("/", response_class=HTMLResponse) + async def index(): + html_path = os.path.join(os.path.dirname(__file__), "client.html") + with open(html_path) as f: + return HTMLResponse(f.read()) + + return application + + +app = create_app() + +if __name__ == "__main__": + import uvicorn + + port = int(os.environ.get("PORT", "3000")) + print(f"Deepgram proxy listening on http://localhost:{port}") + print(" POST /v1/listen - pre-recorded transcription") + print(" POST /v1/speak - text-to-speech") + print(" WS /v1/listen/stream - live STT streaming") + print(" GET /health - health check") + print(" GET / - demo client") + uvicorn.run(app, host="0.0.0.0", port=port) diff --git a/examples/521-deepgram-proxy-python-uv/tests/test_example.py b/examples/521-deepgram-proxy-python-uv/tests/test_example.py new file mode 100644 index 0000000..afa38e2 --- /dev/null +++ b/examples/521-deepgram-proxy-python-uv/tests/test_example.py @@ -0,0 +1,227 @@ +import json +import os +import struct +import subprocess +import sys +import threading +import time +from pathlib import Path + +# -- Credential check ------------------------------------------------------- +env_example = Path(__file__).parent.parent / ".env.example" +required = [ + line.split("=")[0].strip() + for line in env_example.read_text().splitlines() + if line and not line.startswith("#") and "=" in line and line[0].isupper() +] +missing = [k for k in required if not os.environ.get(k)] +if missing: + print(f"MISSING_CREDENTIALS: {','.join(missing)}", file=sys.stderr) + sys.exit(2) +# --------------------------------------------------------------------------- + +sys.path.insert(0, str(Path(__file__).parent.parent / "src")) + +from fastapi.testclient import TestClient +from server import create_app + +AUDIO_URL = "https://dpgr.am/spacewalk.wav" +TEST_PORT = 3097 + +app = create_app() +client = TestClient(app) + + +def test_health_endpoint(): + resp = client.get("/health") + assert resp.status_code == 200 + data = resp.json() + assert data["status"] == "ok" + assert data["service"] == "deepgram-proxy" + print("/health -> ok") + + +def test_listen_validation(): + resp = client.post("/v1/listen", json={}) + assert resp.status_code == 422, f"Expected 422 for missing url, got {resp.status_code}" + print("/v1/listen validation -> 422 for missing url") + + +def test_listen_prerecorded(): + resp = client.post( + "/v1/listen", + json={"url": AUDIO_URL, "smart_format": True}, + ) + assert resp.status_code == 200, f"/v1/listen returned {resp.status_code}: {resp.text}" + data = resp.json() + + transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"] + assert len(transcript) >= 66, ( + f"Transcript too short: {len(transcript)} chars, expected >= 66" + ) + print(f"/v1/listen -> {len(transcript)} chars") + print(f" Preview: '{transcript[:80]}...'") + + +def test_speak_validation(): + resp = client.post("/v1/speak", json={}) + assert resp.status_code == 422, f"Expected 422 for missing text, got {resp.status_code}" + print("/v1/speak validation -> 422 for missing text") + + +def test_speak_tts(): + resp = client.post( + "/v1/speak", + json={"text": "Hello from the Deepgram proxy test."}, + ) + assert resp.status_code == 200, f"/v1/speak returned {resp.status_code}: {resp.text}" + assert resp.headers.get("content-type", "").startswith("audio/") + audio_bytes = resp.content + assert len(audio_bytes) > 100, f"TTS audio too small: {len(audio_bytes)} bytes" + print(f"/v1/speak -> {len(audio_bytes)} bytes of audio") + + +def test_websocket_live_stt(): + tmp_wav = Path("/tmp/proxy_test_521.wav") + if not tmp_wav.exists(): + print("Downloading test audio...") + subprocess.run( + ["curl", "-s", "-L", "-o", str(tmp_wav), AUDIO_URL], + check=True, + ) + + wav_data = tmp_wav.read_bytes() + pcm_data = _wav_to_linear16_16k(wav_data) + print(f"Audio ready: {len(pcm_data)} bytes of linear16 16kHz") + + import uvicorn + import websockets.sync.client as ws_sync + + server_ready = threading.Event() + + def run_server(): + config = uvicorn.Config(app, host="127.0.0.1", port=TEST_PORT, log_level="warning") + server = uvicorn.Server(config) + server_ready.server_instance = server + server_ready.set() + server.run() + + thread = threading.Thread(target=run_server, daemon=True) + thread.start() + server_ready.wait(timeout=5) + time.sleep(1) + + transcripts = [] + chunk_size = 3200 + max_bytes = 16000 * 2 * 30 + + try: + with ws_sync.connect(f"ws://127.0.0.1:{TEST_PORT}/v1/listen/stream") as ws: + ws.recv_bufsize = 65536 + + offset = 0 + while offset < len(pcm_data) and offset < max_bytes: + ws.send(pcm_data[offset : offset + chunk_size]) + offset += chunk_size + time.sleep(0.01) + + try: + ws.socket.setblocking(False) + try: + raw = ws.recv(timeout=0) + data = json.loads(raw) + text = data.get("channel", {}).get("alternatives", [{}])[0].get("transcript", "") + if text: + transcripts.append(text) + except Exception: + pass + finally: + ws.socket.setblocking(True) + except Exception: + pass + + for _ in range(300): + try: + raw = ws.recv(timeout=0.5) + data = json.loads(raw) + text = data.get("channel", {}).get("alternatives", [{}])[0].get("transcript", "") + if text: + transcripts.append(text) + except TimeoutError: + break + except Exception: + break + finally: + if hasattr(server_ready, "server_instance"): + server_ready.server_instance.should_exit = True + + assert len(transcripts) > 0, "No transcripts received via WebSocket proxy" + combined = " ".join(transcripts) + audio_sent_secs = min(len(pcm_data), max_bytes) / (16000 * 2) + min_chars = max(5, int(audio_sent_secs * 2)) + assert len(combined) >= min_chars, ( + f"Combined transcript too short: {len(combined)} chars for {audio_sent_secs:.1f}s audio" + ) + print(f"WS /v1/listen/stream -> {len(transcripts)} transcript events") + print(f" Combined: {len(combined)} chars over {audio_sent_secs:.1f}s audio") + print(f" First: '{transcripts[0][:80]}'") + + +def _wav_to_linear16_16k(wav_data: bytes) -> bytes: + offset = 12 + sample_rate = 0 + bits_per_sample = 0 + num_channels = 0 + data_start = 0 + data_size = 0 + + while offset < len(wav_data) - 8: + chunk_id = wav_data[offset : offset + 4].decode("ascii", errors="replace") + chunk_size = struct.unpack_from("> 8 + elif bits_per_sample == 32: + sample = struct.unpack_from("> 16 + else: + sample = (wav_data[byte_off] - 128) << 8 + struct.pack_into(" Date: Fri, 10 Apr 2026 17:16:50 +0000 Subject: [PATCH 2/2] fix(examples): fix broken audio URL, lower threshold, and real-time WS pacing in 521-deepgram-proxy-python-uv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- examples/521-deepgram-proxy-python-uv/src/client.html | 2 +- .../521-deepgram-proxy-python-uv/tests/test_example.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/521-deepgram-proxy-python-uv/src/client.html b/examples/521-deepgram-proxy-python-uv/src/client.html index af69080..2f410a4 100644 --- a/examples/521-deepgram-proxy-python-uv/src/client.html +++ b/examples/521-deepgram-proxy-python-uv/src/client.html @@ -33,7 +33,7 @@

Live Microphone Transcription

Pre-recorded Transcription

- +
diff --git a/examples/521-deepgram-proxy-python-uv/tests/test_example.py b/examples/521-deepgram-proxy-python-uv/tests/test_example.py index afa38e2..bb954a5 100644 --- a/examples/521-deepgram-proxy-python-uv/tests/test_example.py +++ b/examples/521-deepgram-proxy-python-uv/tests/test_example.py @@ -25,7 +25,7 @@ from fastapi.testclient import TestClient from server import create_app -AUDIO_URL = "https://dpgr.am/spacewalk.wav" +AUDIO_URL = "https://static.deepgram.com/examples/Bueller-Life-moves-pretty-fast.wav" TEST_PORT = 3097 app = create_app() @@ -56,8 +56,8 @@ def test_listen_prerecorded(): data = resp.json() transcript = data["results"]["channels"][0]["alternatives"][0]["transcript"] - assert len(transcript) >= 66, ( - f"Transcript too short: {len(transcript)} chars, expected >= 66" + assert len(transcript) >= 20, ( + f"Transcript too short: {len(transcript)} chars, expected >= 20" ) print(f"/v1/listen -> {len(transcript)} chars") print(f" Preview: '{transcript[:80]}...'") @@ -123,7 +123,7 @@ def run_server(): while offset < len(pcm_data) and offset < max_bytes: ws.send(pcm_data[offset : offset + chunk_size]) offset += chunk_size - time.sleep(0.01) + time.sleep(chunk_size / (16000 * 2)) try: ws.socket.setblocking(False)