Translate Streaming STT Transcripts with LLM Gateway
In this guide, you’ll learn how to implement real-time translation of final transcripts using AssemblyAI’s Streaming API and LLM Gateway.
Quickstart
Python
JavaScript
1 import pyaudio 2 import websocket 3 import json 4 import threading 5 import time 6 import requests 7 from urllib.parse import urlencode 8 9 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key 10 11 CONNECTION_PARAMS = { 12 "sample_rate": 16000, 13 "speech_model": "u3-rt-pro", 14 } 15 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 16 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 17 18 FRAMES_PER_BUFFER = 800 19 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"] 20 CHANNELS = 1 21 FORMAT = pyaudio.paInt16 22 23 audio = None 24 stream = None 25 ws_app = None 26 audio_thread = None 27 stop_event = threading.Event() 28 29 def translate_text(text): 30 """Called when translating final transcripts.""" 31 headers = { 32 "authorization": YOUR_API_KEY 33 } 34 35 llm_gateway_data = { 36 "model": "gemini-2.5-flash-lite", 37 "messages": [ 38 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"} 39 ], 40 "max_tokens": 1000 41 } 42 43 result = requests.post( 44 "https://llm-gateway.assemblyai.com/v1/chat/completions", 45 headers=headers, 46 json=llm_gateway_data 47 ) 48 return result.json()["choices"][0]["message"]["content"] 49 50 def on_open(ws): 51 print("WebSocket connection opened.") 52 def stream_audio(): 53 global stream 54 while not stop_event.is_set(): 55 try: 56 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 57 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) 58 except Exception as e: 59 print(f"Error streaming audio: {e}") 60 break 61 62 global audio_thread 63 audio_thread = threading.Thread(target=stream_audio) 64 audio_thread.daemon = True 65 audio_thread.start() 66 67 def on_message(ws, message): 68 try: 69 data = json.loads(message) 70 msg_type = data.get("type") 71 72 if msg_type == "Begin": 73 print(f"Session began: ID={data.get('id')}") 74 elif msg_type == "Turn": 75 transcript = data.get("transcript", "") 76 if data.get("end_of_turn"): 77 print(f"\r{' ' * 80}\r", end="") 78 print(translate_text(transcript)) 79 else: 80 print(f"\r{transcript}", end="") 81 elif msg_type == "Termination": 82 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio") 83 except Exception as e: 84 print(f"Error handling message: {e}") 85 86 def on_error(ws, error): 87 print(f"\nWebSocket Error: {error}") 88 stop_event.set() 89 90 def on_close(ws, close_status_code, close_msg): 91 print(f"\nWebSocket Disconnected: Status={close_status_code}") 92 global stream, audio 93 stop_event.set() 94 if stream: 95 if stream.is_active(): 96 stream.stop_stream() 97 stream.close() 98 if audio: 99 audio.terminate() 100 101 def run(): 102 global audio, stream, ws_app 103 104 audio = pyaudio.PyAudio() 105 stream = audio.open( 106 input=True, 107 frames_per_buffer=FRAMES_PER_BUFFER, 108 channels=CHANNELS, 109 format=FORMAT, 110 rate=SAMPLE_RATE, 111 ) 112 print("Speak into your microphone. Press Ctrl+C to stop.") 113 114 ws_app = websocket.WebSocketApp( 115 API_ENDPOINT, 116 header={"Authorization": YOUR_API_KEY}, 117 on_open=on_open, 118 on_message=on_message, 119 on_error=on_error, 120 on_close=on_close, 121 ) 122 123 ws_thread = threading.Thread(target=ws_app.run_forever) 124 ws_thread.daemon = True 125 ws_thread.start() 126 127 try: 128 while ws_thread.is_alive(): 129 time.sleep(0.1) 130 except KeyboardInterrupt: 131 print("\nStopping...") 132 stop_event.set() 133 if ws_app and ws_app.sock and ws_app.sock.connected: 134 ws_app.send(json.dumps({"type": "Terminate"})) 135 time.sleep(2) 136 if ws_app: 137 ws_app.close() 138 ws_thread.join(timeout=2.0) 139 140 if __name__ == "__main__": 141 run()
Step-by-Step Instructions
Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.
Install Dependencies
Python
JavaScript
$ pip install websocket-client pyaudio requests
Import Packages & Set API Key
Python
JavaScript
1 import pyaudio 2 import websocket 3 import json 4 import threading 5 import time 6 import requests 7 from urllib.parse import urlencode 8 9 YOUR_API_KEY = "YOUR_API_KEY" # Replace with your actual API key
Audio Configuration & Global Variables
Set all of your audio configurations and global variables.
Python
JavaScript
1 CONNECTION_PARAMS = { 2 "sample_rate": 16000, 3 "speech_model": "u3-rt-pro", 4 } 5 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 6 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 7 8 FRAMES_PER_BUFFER = 800 9 SAMPLE_RATE = CONNECTION_PARAMS["sample_rate"] 10 CHANNELS = 1 11 FORMAT = pyaudio.paInt16 12 13 audio = None 14 stream = None 15 ws_app = None 16 audio_thread = None 17 stop_event = threading.Event()
Define Translate Text Function
Define a function called translate_text (Python) or translateText (JavaScript), which uses LLM Gateway to translate the English final transcripts into another language. This example is translating the text into Spanish. To set this to a different language, just replace “Spanish” in the prompt with your language of choice.
Python
JavaScript
1 def translate_text(text): 2 """Called when translating final transcripts.""" 3 headers = { 4 "authorization": YOUR_API_KEY 5 } 6 7 llm_gateway_data = { 8 "model": "gemini-2.5-flash-lite", 9 "messages": [ 10 {"role": "user", "content": f"Translate the following text into Spanish. Do not write a preamble. Just return the translated text.\n\nText: {text}"} 11 ], 12 "max_tokens": 1000 13 } 14 15 result = requests.post( 16 "https://llm-gateway.assemblyai.com/v1/chat/completions", 17 headers=headers, 18 json=llm_gateway_data 19 ) 20 return result.json()["choices"][0]["message"]["content"]
Websocket Event Handlers
Open Websocket
Python
JavaScript
1 def on_open(ws): 2 print("WebSocket connection opened.") 3 def stream_audio(): 4 global stream 5 while not stop_event.is_set(): 6 try: 7 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False) 8 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY) 9 except Exception as e: 10 print(f"Error streaming audio: {e}") 11 break 12 13 global audio_thread 14 audio_thread = threading.Thread(target=stream_audio) 15 audio_thread.daemon = True 16 audio_thread.start()
Handle Websocket Messages
In this function, use the previously defined translate_text / translateText to translate all final transcripts.
Python
JavaScript
1 def on_message(ws, message): 2 try: 3 data = json.loads(message) 4 msg_type = data.get("type") 5 6 if msg_type == "Begin": 7 print(f"Session began: ID={data.get('id')}") 8 elif msg_type == "Turn": 9 transcript = data.get("transcript", "") 10 if data.get("end_of_turn"): 11 print(f"\r{' ' * 80}\r", end="") 12 print(translate_text(transcript)) 13 else: 14 print(f"\r{transcript}", end="") 15 elif msg_type == "Termination": 16 print(f"\nSession terminated: {data.get('audio_duration_seconds', 0)}s of audio") 17 except Exception as e: 18 print(f"Error handling message: {e}")
Close Websocket
Python
JavaScript
1 def on_close(ws, close_status_code, close_msg): 2 print(f"\nWebSocket Disconnected: Status={close_status_code}") 3 global stream, audio 4 stop_event.set() 5 if stream: 6 if stream.is_active(): 7 stream.stop_stream() 8 stream.close() 9 if audio: 10 audio.terminate()
Websocket Error Handling
Python
JavaScript
1 def on_error(ws, error): 2 print(f"\nWebSocket Error: {error}") 3 stop_event.set()
Begin Streaming STT Transcription
Python
JavaScript
1 def run(): 2 global audio, stream, ws_app 3 4 audio = pyaudio.PyAudio() 5 stream = audio.open( 6 input=True, 7 frames_per_buffer=FRAMES_PER_BUFFER, 8 channels=CHANNELS, 9 format=FORMAT, 10 rate=SAMPLE_RATE, 11 ) 12 print("Speak into your microphone. Press Ctrl+C to stop.") 13 14 ws_app = websocket.WebSocketApp( 15 API_ENDPOINT, 16 header={"Authorization": YOUR_API_KEY}, 17 on_open=on_open, 18 on_message=on_message, 19 on_error=on_error, 20 on_close=on_close, 21 ) 22 23 ws_thread = threading.Thread(target=ws_app.run_forever) 24 ws_thread.daemon = True 25 ws_thread.start() 26 27 try: 28 while ws_thread.is_alive(): 29 time.sleep(0.1) 30 except KeyboardInterrupt: 31 print("\nStopping...") 32 stop_event.set() 33 if ws_app and ws_app.sock and ws_app.sock.connected: 34 ws_app.send(json.dumps({"type": "Terminate"})) 35 time.sleep(2) 36 if ws_app: 37 ws_app.close() 38 ws_thread.join(timeout=2.0) 39 40 if __name__ == "__main__": 41 run()