Transcribe audio files with Streaming
This guide shows you how to transcribe WAV audio files with varying sample rates using our Streaming API.
Quickstart
Here is the complete Python script to transcribe a WAV audio file using the Streaming API.
1 import websocket 2 import json 3 import threading 4 import time 5 import wave 6 import sys 7 import os 8 from urllib.parse import urlencode 9 from pathlib import Path 10 11 # --- Configuration --- 12 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"] 13 AUDIO_FILE = "audio.wav" # Path to your audio file 14 SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file 15 SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file 16 PLAY_AUDIO = True # Set to False to disable audio playback 17 18 CONNECTION_PARAMS = { 19 "speech_model": "u3-rt-pro", 20 "sample_rate": SAMPLE_RATE, 21 } 22 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 23 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 24 25 # Global variables 26 ws_app = None 27 audio_thread = None 28 stop_event = threading.Event() 29 30 # Track session data for output file 31 session_data = { 32 "session_id": None, 33 "audio_file": AUDIO_FILE, 34 "audio_duration_seconds": None, 35 "turns": [] 36 } 37 38 # --- Helper Functions --- 39 40 def validate_audio_file(filepath, sample_rate): 41 """Validate audio file before streaming.""" 42 file_ext = Path(filepath).suffix.lower() 43 if file_ext != ".wav": 44 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr) 45 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 46 sys.exit(1) 47 48 with wave.open(filepath, 'rb') as wav_file: 49 if wav_file.getnchannels() != 1: 50 print("Error: Only mono audio is supported", file=sys.stderr) 51 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 52 sys.exit(1) 53 54 file_sample_rate = wav_file.getframerate() 55 if file_sample_rate != sample_rate: 56 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr) 57 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 58 sys.exit(1) 59 60 61 def save_transcript(): 62 """Save the transcript to a file in the same directory as the script.""" 63 audio_name = Path(session_data["audio_file"]).stem 64 session_id = session_data["session_id"] or "unknown" 65 output_file = f"{audio_name}_{session_id}.txt" 66 67 with open(output_file, "w") as f: 68 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n") 69 f.write(f"Audio file: {session_data['audio_file']}\n") 70 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n") 71 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n") 72 f.write("\nTranscription Output\n") 73 for i, turn in enumerate(session_data["turns"], 1): 74 f.write(f"[Turn #{i}]: {turn}\n") 75 76 print(f"Transcript saved to {output_file}") 77 78 79 # --- WebSocket Event Handlers --- 80 81 def on_open(ws): 82 """Called when the WebSocket connection is established.""" 83 print("WebSocket connection opened.") 84 print(f"Connected to: {API_ENDPOINT}") 85 86 def stream_file(): 87 chunk_duration = 0.05 # 50ms chunks 88 audio_player = None 89 90 if PLAY_AUDIO: 91 try: 92 import pyaudio 93 p = pyaudio.PyAudio() 94 with wave.open(AUDIO_FILE, 'rb') as wav_file: 95 audio_player = p.open( 96 format=p.get_format_from_width(wav_file.getsampwidth()), 97 channels=wav_file.getnchannels(), 98 rate=wav_file.getframerate(), 99 output=True 100 ) 101 except ImportError: 102 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr) 103 print("Install with: pip install pyaudio", file=sys.stderr) 104 105 try: 106 with wave.open(AUDIO_FILE, 'rb') as wav_file: 107 frames_per_chunk = int(SAMPLE_RATE * chunk_duration) 108 109 while not stop_event.is_set(): 110 frames = wav_file.readframes(frames_per_chunk) 111 if not frames: 112 break 113 114 if audio_player: 115 audio_player.write(frames) 116 else: 117 time.sleep(chunk_duration) 118 119 ws.send(frames, websocket.ABNF.OPCODE_BINARY) 120 finally: 121 if audio_player: 122 audio_player.stop_stream() 123 audio_player.close() 124 p.terminate() 125 126 # All audio sent - terminate the session 127 print("File streaming complete. Waiting for final transcripts...") 128 try: 129 ws.send(json.dumps({"type": "Terminate"})) 130 except Exception: 131 pass 132 133 global audio_thread 134 audio_thread = threading.Thread(target=stream_file) 135 audio_thread.daemon = True 136 audio_thread.start() 137 138 139 def on_message(ws, message): 140 try: 141 data = json.loads(message) 142 msg_type = data.get('type') 143 144 if msg_type == "Begin": 145 session_data["session_id"] = data.get('id') 146 print(f"Session ID: {data.get('id')}\n") 147 elif msg_type == "Turn": 148 transcript = data.get('transcript', '') 149 if not transcript: 150 return 151 152 if data.get('end_of_turn'): 153 print(f"[Final]: {transcript}\n") 154 session_data["turns"].append(transcript) 155 else: 156 print(f"[Partial]: {transcript}") 157 elif msg_type == "Termination": 158 session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0) 159 print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed") 160 except json.JSONDecodeError as e: 161 print(f"Error decoding message: {e}") 162 except Exception as e: 163 print(f"Error handling message: {e}") 164 165 166 def on_error(ws, error): 167 """Called when a WebSocket error occurs.""" 168 print(f"\nWebSocket Error: {error}") 169 stop_event.set() 170 171 172 def on_close(ws, close_status_code, close_msg): 173 """Called when the WebSocket connection is closed.""" 174 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}") 175 stop_event.set() 176 177 if SAVE_TRANSCRIPT_TO_FILE: 178 save_transcript() 179 180 if audio_thread and audio_thread.is_alive(): 181 audio_thread.join(timeout=1.0) 182 183 184 # --- Main Execution --- 185 def run(): 186 global ws_app 187 188 # Validate audio file before connecting 189 validate_audio_file(AUDIO_FILE, SAMPLE_RATE) 190 191 # Create WebSocketApp 192 ws_app = websocket.WebSocketApp( 193 API_ENDPOINT, 194 header={"Authorization": ASSEMBLYAI_API_KEY}, 195 on_open=on_open, 196 on_message=on_message, 197 on_error=on_error, 198 on_close=on_close, 199 ) 200 201 # Run WebSocketApp in a separate thread 202 ws_thread = threading.Thread(target=ws_app.run_forever) 203 ws_thread.daemon = True 204 ws_thread.start() 205 206 try: 207 while ws_thread.is_alive(): 208 time.sleep(0.1) 209 except KeyboardInterrupt: 210 print("\nCtrl+C received. Stopping...") 211 stop_event.set() 212 213 if ws_app and ws_app.sock and ws_app.sock.connected: 214 try: 215 ws_app.send(json.dumps({"type": "Terminate"})) 216 time.sleep(2) 217 except Exception as e: 218 print(f"Error sending termination message: {e}") 219 220 if ws_app: 221 ws_app.close() 222 ws_thread.join(timeout=2.0) 223 224 except Exception as e: 225 print(f"\nAn unexpected error occurred: {e}") 226 stop_event.set() 227 if ws_app: 228 ws_app.close() 229 ws_thread.join(timeout=2.0) 230 231 finally: 232 print("Cleanup complete. Exiting.") 233 234 235 if __name__ == "__main__": 236 run()
Step-by-step guide
Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up and get your API key from your dashboard.
Install and import packages
Install the required packages. PyAudio is optional — only needed for audio playback during streaming.
$ pip install websocket-client $ pip install pyaudio
Import packages.
1 import websocket 2 import json 3 import os 4 import threading 5 import time 6 import wave 7 import sys 8 from urllib.parse import urlencode 9 from pathlib import Path
Configure settings
Set your ASSEMBLYAI_API_KEY environment variable.
Set AUDIO_FILE to the relative or absolute path of your audio file, and set SAMPLE_RATE to match your file’s sample rate.
1 ASSEMBLYAI_API_KEY = os.environ["ASSEMBLYAI_API_KEY"] 2 AUDIO_FILE = "audio.wav" # Path to your audio file 3 SAMPLE_RATE = 48000 # Change to match the sample rate of your audio file 4 SAVE_TRANSCRIPT_TO_FILE = True # Set to False to disable saving transcript to file 5 PLAY_AUDIO = True # Set to False to disable audio playback 6 7 CONNECTION_PARAMS = { 8 "speech_model": "u3-rt-pro", 9 "sample_rate": SAMPLE_RATE, 10 } 11 API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws" 12 API_ENDPOINT = f"{API_ENDPOINT_BASE_URL}?{urlencode(CONNECTION_PARAMS)}" 13 14 # Global variables 15 ws_app = None 16 audio_thread = None 17 stop_event = threading.Event() 18 19 # Track session data for output file 20 session_data = { 21 "session_id": None, 22 "audio_file": AUDIO_FILE, 23 "audio_duration_seconds": None, 24 "turns": [] 25 }
Helper functions
The following helper functions are used to validate audio files and save the transcript output:
validate_audio_file()- Validates that the audio file is a mono WAV file with the expected sample rate.save_transcript()- Saves the transcript to a text file after the session ends.
1 def validate_audio_file(filepath, sample_rate): 2 """Validate audio file before streaming.""" 3 file_ext = Path(filepath).suffix.lower() 4 if file_ext != ".wav": 5 print(f"Error: Only WAV files are supported. Got: {file_ext}", file=sys.stderr) 6 print(f"Convert your file to WAV using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 7 sys.exit(1) 8 9 with wave.open(filepath, 'rb') as wav_file: 10 if wav_file.getnchannels() != 1: 11 print("Error: Only mono audio is supported", file=sys.stderr) 12 print(f"Convert your file to mono using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 13 sys.exit(1) 14 15 file_sample_rate = wav_file.getframerate() 16 if file_sample_rate != sample_rate: 17 print(f"Error: File sample rate ({file_sample_rate}) doesn't match expected rate ({sample_rate})", file=sys.stderr) 18 print(f"Either update SAMPLE_RATE to {file_sample_rate}, or convert your file using: ffmpeg -i {filepath} -ar {sample_rate} -ac 1 output.wav", file=sys.stderr) 19 sys.exit(1) 20 21 22 def save_transcript(): 23 """Save the transcript to a file in the same directory as the script.""" 24 audio_name = Path(session_data["audio_file"]).stem 25 session_id = session_data["session_id"] or "unknown" 26 output_file = f"{audio_name}_{session_id}.txt" 27 28 with open(output_file, "w") as f: 29 f.write(f"AssemblyAI Session ID: {session_data['session_id']}\n") 30 f.write(f"Audio file: {session_data['audio_file']}\n") 31 f.write(f"Audio duration: {session_data['audio_duration_seconds']} seconds\n") 32 f.write("See all available parameters and defaults at https://www.assemblyai.com/docs/api-reference/streaming-api/universal-3-pro-streaming/universal-3-pro-streaming#request.query\n\n") 33 f.write("\nTranscription Output\n") 34 for i, turn in enumerate(session_data["turns"], 1): 35 f.write(f"[Turn #{i}]: {turn}\n") 36 37 print(f"Transcript saved to {output_file}")
WebSocket event handlers
Open WebSocket and stream audio file
When the connection opens, we start a background thread that reads the WAV file in 50ms chunks and sends them over the WebSocket. If PLAY_AUDIO is enabled, the audio is also played through your speakers.
1 def on_open(ws): 2 """Called when the WebSocket connection is established.""" 3 print("WebSocket connection opened.") 4 print(f"Connected to: {API_ENDPOINT}") 5 6 def stream_file(): 7 chunk_duration = 0.05 # 50ms chunks 8 audio_player = None 9 10 if PLAY_AUDIO: 11 try: 12 import pyaudio 13 p = pyaudio.PyAudio() 14 with wave.open(AUDIO_FILE, 'rb') as wav_file: 15 audio_player = p.open( 16 format=p.get_format_from_width(wav_file.getsampwidth()), 17 channels=wav_file.getnchannels(), 18 rate=wav_file.getframerate(), 19 output=True 20 ) 21 except ImportError: 22 print("Warning: pyaudio not installed. Audio playback disabled.", file=sys.stderr) 23 print("Install with: pip install pyaudio", file=sys.stderr) 24 25 try: 26 with wave.open(AUDIO_FILE, 'rb') as wav_file: 27 frames_per_chunk = int(SAMPLE_RATE * chunk_duration) 28 29 while not stop_event.is_set(): 30 frames = wav_file.readframes(frames_per_chunk) 31 if not frames: 32 break 33 34 if audio_player: 35 audio_player.write(frames) 36 else: 37 time.sleep(chunk_duration) 38 39 ws.send(frames, websocket.ABNF.OPCODE_BINARY) 40 finally: 41 if audio_player: 42 audio_player.stop_stream() 43 audio_player.close() 44 p.terminate() 45 46 # All audio sent - terminate the session 47 print("File streaming complete. Waiting for final transcripts...") 48 try: 49 ws.send(json.dumps({"type": "Terminate"})) 50 except Exception: 51 pass 52 53 global audio_thread 54 audio_thread = threading.Thread(target=stream_file) 55 audio_thread.daemon = True 56 audio_thread.start()
Handle WebSocket messages
1 def on_message(ws, message): 2 try: 3 data = json.loads(message) 4 msg_type = data.get('type') 5 6 if msg_type == "Begin": 7 session_data["session_id"] = data.get('id') 8 print(f"Session ID: {data.get('id')}\n") 9 elif msg_type == "Turn": 10 transcript = data.get('transcript', '') 11 if not transcript: 12 return 13 14 if data.get('end_of_turn'): 15 print(f"[Final]: {transcript}\n") 16 session_data["turns"].append(transcript) 17 else: 18 print(f"[Partial]: {transcript}") 19 elif msg_type == "Termination": 20 session_data["audio_duration_seconds"] = data.get('audio_duration_seconds', 0) 21 print(f"Session terminated: {data.get('audio_duration_seconds', 0)} seconds of audio processed") 22 except json.JSONDecodeError as e: 23 print(f"Error decoding message: {e}") 24 except Exception as e: 25 print(f"Error handling message: {e}")
WebSocket error and close handlers
1 def on_error(ws, error): 2 """Called when a WebSocket error occurs.""" 3 print(f"\nWebSocket Error: {error}") 4 stop_event.set() 5 6 7 def on_close(ws, close_status_code, close_msg): 8 """Called when the WebSocket connection is closed.""" 9 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}") 10 stop_event.set() 11 12 if SAVE_TRANSCRIPT_TO_FILE: 13 save_transcript() 14 15 if audio_thread and audio_thread.is_alive(): 16 audio_thread.join(timeout=1.0)
Connect and stream the file
1 def run(): 2 global ws_app 3 4 # Validate audio file before connecting 5 validate_audio_file(AUDIO_FILE, SAMPLE_RATE) 6 7 # Create WebSocketApp 8 ws_app = websocket.WebSocketApp( 9 API_ENDPOINT, 10 header={"Authorization": ASSEMBLYAI_API_KEY}, 11 on_open=on_open, 12 on_message=on_message, 13 on_error=on_error, 14 on_close=on_close, 15 ) 16 17 # Run WebSocketApp in a separate thread 18 ws_thread = threading.Thread(target=ws_app.run_forever) 19 ws_thread.daemon = True 20 ws_thread.start() 21 22 try: 23 while ws_thread.is_alive(): 24 time.sleep(0.1) 25 except KeyboardInterrupt: 26 print("\nCtrl+C received. Stopping...") 27 stop_event.set() 28 29 if ws_app and ws_app.sock and ws_app.sock.connected: 30 try: 31 ws_app.send(json.dumps({"type": "Terminate"})) 32 time.sleep(2) 33 except Exception as e: 34 print(f"Error sending termination message: {e}") 35 36 if ws_app: 37 ws_app.close() 38 ws_thread.join(timeout=2.0) 39 40 except Exception as e: 41 print(f"\nAn unexpected error occurred: {e}") 42 stop_event.set() 43 if ws_app: 44 ws_app.close() 45 ws_thread.join(timeout=2.0) 46 47 finally: 48 print("Cleanup complete. Exiting.") 49 50 51 if __name__ == "__main__": 52 run()
The session will terminate once the file is finished streaming. If SAVE_TRANSCRIPT_TO_FILE is enabled (default), the transcript will be saved to {audio_filename}_{session_id}.txt in the current working directory.
The AUDIO_FILE path can be either relative (e.g., audio.wav) or absolute
(e.g., /path/to/audio.wav).
Example output
Here’s an example of what the console output looks like when streaming an audio file:
1 WebSocket connection opened. 2 Connected to: wss://streaming.assemblyai.com/v3/ws?speech_model=u3-rt-pro&sample_rate=48000 3 4 Session ID: f37d7c4e-6be9-47ed-b6fc-7600fc78e34d 5 6 [Partial]: the 7 [Partial]: the quick 8 [Partial]: the quick brown 9 [Partial]: the quick brown fox 10 [Partial]: the quick brown fox jumps 11 [Partial]: the quick brown fox jumps over 12 [Partial]: the quick brown fox jumps over the 13 [Partial]: the quick brown fox jumps over the lazy 14 [Partial]: The quick brown fox jumps over the lazy dog 15 [Final]: The quick brown fox jumps over the lazy dog. 16 17 [Partial]: It 18 [Partial]: It is 19 [Partial]: It is a 20 [Partial]: It is a common 21 [Partial]: It is a common typing 22 [Partial]: It is a common typing test 23 [Final]: It is a common typing test. 24 25 File streaming complete. Waiting for final transcripts... 26 Session terminated: 7.52 seconds of audio processed 27 28 WebSocket Disconnected: Status=1000, Msg=None 29 Transcript saved to audio_f37d7c4e-6be9-47ed-b6fc-7600fc78e34d.txt 30 Cleanup complete. Exiting.
The output shows:
- Partial transcripts: Real-time updates as words are recognized
- Final: The complete turn with proper capitalization and punctuation