The official Python library for the Fish Audio API
Documentation: Python SDK Guide | API Reference
Note: If you're using the legacy
fish_audio_sdkAPI, see the migration guide to upgrade.
pip install fish-audio-sdk
# With audio playback utilities
pip install fish-audio-sdk[utils]Get your API key from fish.audio/app/api-keys:
export FISH_API_KEY=your_api_key_hereOr provide directly:
from fishaudio import FishAudio
client = FishAudio(api_key="your_api_key")Synchronous:
from fishaudio import FishAudio
from fishaudio.utils import play, save
client = FishAudio()
# Generate audio
audio = client.tts.convert(text="Hello, world!")
# Play or save
play(audio)
save(audio, "output.mp3")Asynchronous:
import asyncio
from fishaudio import AsyncFishAudio
from fishaudio.utils import play, save
async def main():
client = AsyncFishAudio()
audio = await client.tts.convert(text="Hello, world!")
play(audio)
save(audio, "output.mp3")
asyncio.run(main())With custom voice:
# Use a specific voice by ID
audio = client.tts.convert(
text="Custom voice",
reference_id="802e3bc2b27e49c2995d23ef70e6ac89"
)With speed control:
audio = client.tts.convert(
text="Speaking faster!",
speed=1.5 # 1.5x speed
)Reusable configuration:
from fishaudio.types import TTSConfig, Prosody
config = TTSConfig(
prosody=Prosody(speed=1.2, volume=-5),
reference_id="933563129e564b19a115bedd57b7406a",
format="wav",
latency="balanced"
)
# Reuse across generations
audio1 = client.tts.convert(text="First message", config=config)
audio2 = client.tts.convert(text="Second message", config=config)Chunk-by-chunk processing:
# Stream and process chunks as they arrive
for chunk in client.tts.stream(text="Long content..."):
send_to_websocket(chunk)
# Or collect all chunks
audio = client.tts.stream(text="Hello!").collect()# Transcribe audio
with open("audio.wav", "rb") as f:
result = client.asr.transcribe(audio=f.read(), language="en")
print(result.text)
# Access timestamped segments
for segment in result.segments:
print(f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}")Stream dynamically generated text for conversational AI and live applications:
Synchronous:
def text_chunks():
yield "Hello, "
yield "this is "
yield "streaming!"
audio_stream = client.tts.stream_websocket(text_chunks(), latency="balanced")
play(audio_stream)Asynchronous:
async def text_chunks():
yield "Hello, "
yield "this is "
yield "streaming!"
audio_stream = await client.tts.stream_websocket(text_chunks(), latency="balanced")
play(audio_stream)Instant cloning:
from fishaudio.types import ReferenceAudio
# Clone voice on-the-fly
with open("reference.wav", "rb") as f:
audio = client.tts.convert(
text="Cloned voice speaking",
references=[ReferenceAudio(
audio=f.read(),
text="Text spoken in reference"
)]
)Persistent voice models:
# Create voice model for reuse
with open("voice_sample.wav", "rb") as f:
voice = client.voices.create(
title="My Voice",
voices=[f.read()],
description="Custom voice clone"
)
# Use the created model
audio = client.tts.convert(
text="Using my saved voice",
reference_id=voice.id
)| Resource | Description | Key Methods |
|---|---|---|
client.tts |
Text-to-speech | convert(), stream(), stream_websocket() |
client.asr |
Speech recognition | transcribe() |
client.voices |
Voice management | list(), get(), create(), update(), delete() |
client.account |
Account info | get_credits(), get_package() |
from fishaudio.exceptions import (
AuthenticationError,
RateLimitError,
ValidationError,
FishAudioError
)
try:
audio = client.tts.convert(text="Hello!")
except AuthenticationError:
print("Invalid API key")
except RateLimitError:
print("Rate limit exceeded")
except ValidationError as e:
print(f"Invalid request: {e}")
except FishAudioError as e:
print(f"API error: {e}")- Documentation: SDK Guide | API Reference
- Package: PyPI | GitHub
- Legacy SDK: Documentation | Migration Guide
This project is licensed under the Apache-2.0 License - see the LICENSE file for details.