Skip to main content

Documentation Index

Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt

Use this file to discover all available pages before exploring further.

The speech_model connection parameter lets you specify which model to use for streaming transcription.
speech_model is requiredYou must include the speech_model parameter in every streaming transcription request. There is no default model. If you omit speech_model, the request will fail.
Recommended modelWe recommend Universal-3 Pro Streaming as your primary model for streaming transcription. It provides the highest accuracy with sub-300ms latency, native multilingual code switching, and advanced prompting support — ideal for voice agents and real-time applications.
Streaming is billed per sessionAll streaming models are billed on the total duration that your WebSocket connection stays open, not on the amount of audio you send. Always send a Terminate message when you’re done with a stream — sessions that aren’t closed auto-close after 3 hours and are billed for the full duration. See Billing and pricing for details.

Available models

NameParameterDescriptionBest for
Universal-3 Pro Streaming"speech_model": "u3-rt-pro"The most accurate model with the fastest word emissions for voice agents that demand the highest quality. Best-in-class accuracy with advanced prompting capabilities. Supports EN, ES, DE, FR, PT, IT.Real-time voice agents needing premium accuracy, elite entity accuracy, IVR replacement, agent assist, multilingual code-switching
Universal-Streaming English"speech_model": "universal-streaming-english"An English transcription model offering a good balance of speed and cost-effectiveness.Cost-effective English real-time transcription, English-only real-time apps
Universal-Streaming Multilingual"speech_model": "universal-streaming-multilingual"A multilingual transcription model offering a good balance of speed and cost-effectiveness. Supports EN, ES, DE, FR, PT, IT.Cost-effective multilingual streaming across EN/ES/DE/FR/PT/IT
Whisper Streaming"speech_model": "whisper-rt"An open-source Whisper model enhanced with AssemblyAI’s reliable infrastructure and unlimited scale. Supports 99+ languages at an accessible price point.Language coverage beyond 6 languages, open-source model preference, cost-sensitive multilingual transcription

Choosing a model

FeatureUniversal-3 Pro StreamingUniversal-Streaming EnglishUniversal-Streaming MultilingualWhisper Streaming
LatencyFastFastestFastModerate
Partial transcriptsYesYesYesYes
MultilingualNative Code SwitchingNoPer Turn99+ languages (auto-detected)
Entity accuracyBestOkayOkayOkay
Disfluencies & filler wordsYesNoNoNo
Language detectionYesNoYesYes (with confidence scores)
Non-speech tagsNoNoNoYes ([Silence], [Music], etc.)
CustomizationKeyterms prompting (known context) + Native prompting (unknown context)Keyterms prompting (known context)Keyterms prompting (known context)No
For detailed setup and configuration of Universal-3 Pro streaming, see the Universal-3 Pro Streaming page. For prompting guidance, see the Prompting guide. For detailed setup and configuration of Whisper streaming, see this page.

End-to-end example

You can select a model by setting the speech_model connection parameter when connecting to the streaming API:
import pyaudio
import websocket
import json
import threading
import time
from urllib.parse import urlencode

YOUR_API_KEY = "<YOUR_API_KEY>"

CONNECTION_PARAMS = {
    "sample_rate": 16000,
    "speech_model": "u3-rt-pro",  # or "universal-streaming-english", "universal-streaming-multilingual", "whisper-rt"
    "min_turn_silence": 100,
    "max_turn_silence": 1000,
    # "format_turns": True,  # Whether to return formatted final transcripts (not applicable to u3-rt-pro)
}
API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"

FRAMES_PER_BUFFER = 800
SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
CHANNELS = 1
FORMAT = pyaudio.paInt16

audio = None
stream = None
ws_app = None
audio_thread = None
stop_event = threading.Event()

def on_open(ws):
    print("WebSocket connection opened.")
    def stream_audio():
        global stream
        while not stop_event.is_set():
            try:
                audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
                ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
            except Exception as e:
                print(f"Error streaming audio: {e}")
                break

    global audio_thread
    audio_thread = threading.Thread(target=stream_audio)
    audio_thread.daemon = True
    audio_thread.start()

def on_message(ws, message):
    try:
        data = json.loads(message)
        msg_type = data.get("type")

        if msg_type == "Begin":
            print(f"Session began: ID={data.get('id')}")
        elif msg_type == "Turn":
            transcript = data.get("transcript", "")
            end_of_turn = data.get("end_of_turn", False)
            if end_of_turn:
                print(f"\r{' ' * 80}\r{transcript}")
            else:
                print(f"\r{transcript}", end="")
        elif msg_type == "Termination":
            print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
    except Exception as e:
        print(f"Error handling message: {e}")

def on_error(ws, error):
    print(f"\nWebSocket Error: {error}")
    stop_event.set()

def on_close(ws, close_status_code, close_msg):
    print(f"\nWebSocket Disconnected: Status={close_status_code}")
    global stream, audio
    stop_event.set()
    if stream:
        if stream.is_active():
            stream.stop_stream()
        stream.close()
    if audio:
        audio.terminate()

def run():
    global audio, stream, ws_app

    audio = pyaudio.PyAudio()
    stream = audio.open(
        input=True,
        frames_per_buffer=FRAMES_PER_BUFFER,
        channels=CHANNELS,
        format=FORMAT,
        rate=SAMPLE_RATE,
    )
    print("Speak into your microphone. Press Ctrl+C to stop.")

    ws_app = websocket.WebSocketApp(
        API_ENDPOINT,
        header={"Authorization": YOUR_API_KEY},
        on_open=on_open,
        on_message=on_message,
        on_error=on_error,
        on_close=on_close,
    )

    ws_thread = threading.Thread(target=ws_app.run_forever)
    ws_thread.daemon = True
    ws_thread.start()

    try:
        while ws_thread.is_alive():
            time.sleep(0.1)
    except KeyboardInterrupt:
        print("\nStopping...")
        stop_event.set()
        if ws_app and ws_app.sock and ws_app.sock.connected:
            ws_app.send(json.dumps({"type": "Terminate"}))
            time.sleep(2)
        if ws_app:
            ws_app.close()
        ws_thread.join(timeout=2.0)

if __name__ == "__main__":
    run()