Translate Streaming STT Transcripts with LLM Gateway

In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.

Quickstart

1import pyaudio
2import websocket
3import json
4import threading
5import time
6import requests
7from urllib.parse import urlencode
8
9YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key
10
11CONNECTION_PARAMS = {
12 "sample_rate": 16000,
13 "speech_model": "u3-rt-pro",
14}
15API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
16API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
17
18FRAMES_PER_BUFFER = 800
19SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
20CHANNELS = 1
21FORMAT = pyaudio.paInt16
22
23audio = None
24stream = None
25ws_app = None
26audio_thread = None
27stop_event = threading.Event()
28
29def translate_text(text):
30 """Called when translating final transcripts."""
31 headers = {
32 "authorization": YOUR_API_KEY
33 }
34
35 llm_gateway_data = {
36 "model": "gemini-2.5-flash-lite",
37 "messages": [
38 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
39 ],
40 "max_tokens": 1000
41 }
42
43 result = requests.post(
44 "https://llm-gateway.assemblyai.com/v1/chat/completions",
45 headers=headers,
46 json=llm_gateway_data
47 )
48 return result.json()["choices"][0]["message"]["content"]
49
50def on_open(ws):
51 print("WebSocket connection opened.")
52 def stream_audio():
53 global stream
54 while not stop_event.is_set():
55 try:
56 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
57 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
58 except Exception as e:
59 print(f"Error streaming audio: {e}")
60 break
61
62 global audio_thread
63 audio_thread = threading.Thread(target=stream_audio)
64 audio_thread.daemon = True
65 audio_thread.start()
66
67def on_message(ws, message):
68 try:
69 data = json.loads(message)
70 msg_type = data.get("type")
71
72 if msg_type == "Begin":
73 print(f"Session began: ID={data.get('id')}")
74 elif msg_type == "Turn":
75 transcript = data.get("transcript", "")
76 if data.get("end_of_turn"):
77 print(f"\r{' ' * 80}\r", end="")
78 print(translate_text(transcript))
79 else:
80 print(f"\r{transcript}", end="")
81 elif msg_type == "Termination":
82 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
83 except Exception as e:
84 print(f"Error handling message: {e}")
85
86def on_error(ws, error):
87 print(f"\nWebSocket Error: {error}")
88 stop_event.set()
89
90def on_close(ws, close_status_code, close_msg):
91 print(f"\nWebSocket Disconnected: Status={close_status_code}")
92 global stream, audio
93 stop_event.set()
94 if stream:
95 if stream.is_active():
96 stream.stop_stream()
97 stream.close()
98 if audio:
99 audio.terminate()
100
101def run():
102 global audio, stream, ws_app
103
104 audio = pyaudio.PyAudio()
105 stream = audio.open(
106 input=True,
107 frames_per_buffer=FRAMES_PER_BUFFER,
108 channels=CHANNELS,
109 format=FORMAT,
110 rate=SAMPLE_RATE,
111 )
112 print("Speak into your microphone. Press Ctrl+C to stop.")
113
114 ws_app = websocket.WebSocketApp(
115 API_ENDPOINT,
116 header={"Authorization": YOUR_API_KEY},
117 on_open=on_open,
118 on_message=on_message,
119 on_error=on_error,
120 on_close=on_close,
121 )
122
123 ws_thread = threading.Thread(target=ws_app.run_forever)
124 ws_thread.daemon = True
125 ws_thread.start()
126
127 try:
128 while ws_thread.is_alive():
129 time.sleep(0.1)
130 except KeyboardInterrupt:
131 print("\nStopping...")
132 stop_event.set()
133 if ws_app and ws_app.sock and ws_app.sock.connected:
134 ws_app.send(json.dumps({"type": "Terminate"}))
135 time.sleep(2)
136 if ws_app:
137 ws_app.close()
138 ws_thread.join(timeout=2.0)
139
140if __name__ == "__main__":
141 run()

Step-by-Step Instructions

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.

Install Dependencies

$pip install websocket-client pyaudio requests

Import Packages & Set API Key

1import pyaudio
2import websocket
3import json
4import threading
5import time
6import requests
7from urllib.parse import urlencode
8
9YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key

Audio Configuration & Global Variables

Set all of your audio configurations and global variables.

1CONNECTION_PARAMS = {
2 "sample_rate": 16000,
3 "speech_model": "u3-rt-pro",
4}
5API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws"
6API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}"
7
8FRAMES_PER_BUFFER = 800
9SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"]
10CHANNELS = 1
11FORMAT = pyaudio.paInt16
12
13audio = None
14stream = None
15ws_app = None
16audio_thread = None
17stop_event = threading.Event()

Define Translate Text Function

Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.

1def translate_text(text):
2 """Called when translating final transcripts."""
3 headers = {
4 "authorization": YOUR_API_KEY
5 }
6
7 llm_gateway_data = {
8 "model": "gemini-2.5-flash-lite",
9 "messages": [
10 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"}
11 ],
12 "max_tokens": 1000
13 }
14
15 result = requests.post(
16 "https://llm-gateway.assemblyai.com/v1/chat/completions",
17 headers=headers,
18 json=llm_gateway_data
19 )
20 return result.json()["choices"][0]["message"]["content"]

Websocket Event Handlers

Open Websocket

1def on_open(ws):
2 print("WebSocket connection opened.")
3 def stream_audio():
4 global stream
5 while not stop_event.is_set():
6 try:
7 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
8 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
9 except Exception as e:
10 print(f"Error streaming audio: {e}")
11 break
12
13 global audio_thread
14 audio_thread = threading.Thread(target=stream_audio)
15 audio_thread.daemon = True
16 audio_thread.start()

Handle Websocket Messages

In this function, use the previously defined translate_text / translateText to translate all final transcripts.

1def on_message(ws, message):
2 try:
3 data = json.loads(message)
4 msg_type = data.get("type")
5
6 if msg_type == "Begin":
7 print(f"Session began: ID={data.get('id')}")
8 elif msg_type == "Turn":
9 transcript = data.get("transcript", "")
10 if data.get("end_of_turn"):
11 print(f"\r{' ' * 80}\r", end="")
12 print(translate_text(transcript))
13 else:
14 print(f"\r{transcript}", end="")
15 elif msg_type == "Termination":
16 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio")
17 except Exception as e:
18 print(f"Error handling message: {e}")

Close Websocket

1def on_close(ws, close_status_code, close_msg):
2 print(f"\nWebSocket Disconnected: Status={close_status_code}")
3 global stream, audio
4 stop_event.set()
5 if stream:
6 if stream.is_active():
7 stream.stop_stream()
8 stream.close()
9 if audio:
10 audio.terminate()

Websocket Error Handling

1def on_error(ws, error):
2 print(f"\nWebSocket Error: {error}")
3 stop_event.set()

Begin Streaming STT Transcription

1def run():
2 global audio, stream, ws_app
3
4 audio = pyaudio.PyAudio()
5 stream = audio.open(
6 input=True,
7 frames_per_buffer=FRAMES_PER_BUFFER,
8 channels=CHANNELS,
9 format=FORMAT,
10 rate=SAMPLE_RATE,
11 )
12 print("Speak into your microphone. Press Ctrl+C to stop.")
13
14 ws_app = websocket.WebSocketApp(
15 API_ENDPOINT,
16 header={"Authorization": YOUR_API_KEY},
17 on_open=on_open,
18 on_message=on_message,
19 on_error=on_error,
20 on_close=on_close,
21 )
22
23 ws_thread = threading.Thread(target=ws_app.run_forever)
24 ws_thread.daemon = True
25 ws_thread.start()
26
27 try:
28 while ws_thread.is_alive():
29 time.sleep(0.1)
30 except KeyboardInterrupt:
31 print("\nStopping...")
32 stop_event.set()
33 if ws_app and ws_app.sock and ws_app.sock.connected:
34 ws_app.send(json.dumps({"type": "Terminate"}))
35 time.sleep(2)
36 if ws_app:
37 ws_app.close()
38 ws_thread.join(timeout=2.0)
39
40if __name__ == "__main__":
41 run()