Stream audio from your microphone with speaker diarization and LLM Gateway to get live transcription and automatic summaries after each speaker turn. Products used: Streaming STT + Universal-3 Pro + LLM Gateway Model selection: UsesDocumentation Index
Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt
Use this file to discover all available pages before exploring further.
u3-rt-pro (Universal-3 Pro Streaming) for the lowest latency (~300ms) with the highest streaming accuracy.
- Python
- JavaScript
# pip install pyaudio websocket-client
import pyaudio
import websocket
import json
import threading
import time
from urllib.parse import urlencode
# ── Config ────────────────────────────────────────────────────
YOUR_API_KEY = "YOUR_API_KEY"
PROMPT = (
"Summarize this speaker turn in one sentence, then list any "
"action items mentioned.\n\nTranscript: {{turn}}"
)
LLM_GATEWAY_CONFIG = {
"model": "claude-sonnet-4-5-20250929",
"messages": [{"role": "user", "content": PROMPT}],
"max_tokens": 500,
}
CONNECTION_PARAMS = {
"sample_rate": 16000,
"speech_model": "u3-rt-pro",
"format_turns": True,
"min_turn_silence": 560, # Wait longer for natural meeting pauses
"max_turn_silence": 2000,
"llm_gateway": json.dumps(LLM_GATEWAY_CONFIG),
}
API_ENDPOINT = f"wss://streaming.assemblyai.com/v3/ws?{urlencode(CONNECTION_PARAMS)}"
# Audio settings
FRAMES_PER_BUFFER = 800
SAMPLE_RATE = 16000
stop_event = threading.Event()
def on_open(ws):
print("Connected — speak into your microphone. Press Ctrl+C to stop.\n")
def stream_audio():
audio = pyaudio.PyAudio()
stream = audio.open(
input=True, frames_per_buffer=FRAMES_PER_BUFFER,
channels=1, format=pyaudio.paInt16, rate=SAMPLE_RATE,
)
while not stop_event.is_set():
try:
data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
ws.send(data, websocket.ABNF.OPCODE_BINARY)
except Exception:
break
stream.stop_stream()
stream.close()
audio.terminate()
threading.Thread(target=stream_audio, daemon=True).start()
def on_message(ws, message):
data = json.loads(message)
msg_type = data.get("type")
if msg_type == "Turn":
transcript = data.get("transcript", "")
if data.get("end_of_turn") and transcript:
print(f"[Turn] {transcript}\n")
elif transcript:
print(f"\r ... {transcript[-80:]}", end="", flush=True)
elif msg_type == "LLMGatewayResponse":
content = data.get("data", {}).get("choices", [{}])[0].get("message", {}).get("content", "")
print(f"[Assistant] {content}\n")
elif msg_type == "Termination":
print(f"\nSession ended — {data.get('audio_duration_seconds', 0)}s of audio processed.")
def on_error(ws, error):
print(f"Error: {error}")
stop_event.set()
def on_close(ws, code, msg):
stop_event.set()
ws_app = websocket.WebSocketApp(
API_ENDPOINT,
header={"Authorization": YOUR_API_KEY},
on_open=on_open, on_message=on_message,
on_error=on_error, on_close=on_close,
)
ws_thread = threading.Thread(target=ws_app.run_forever, daemon=True)
ws_thread.start()
try:
while ws_thread.is_alive():
time.sleep(0.1)
except KeyboardInterrupt:
print("\nStopping...")
stop_event.set()
if ws_app.sock and ws_app.sock.connected:
ws_app.send(json.dumps({"type": "Terminate"}))
time.sleep(2)
ws_app.close()
// npm install ws mic
const WebSocket = require("ws");
const mic = require("mic");
const querystring = require("querystring");
// ── Config ────────────────────────────────────────────────────
const YOUR_API_KEY = "YOUR_API_KEY";
const PROMPT =
"Summarize this speaker turn in one sentence, then list any " +
"action items mentioned.\n\nTranscript: {{turn}}";
const LLM_GATEWAY_CONFIG = {
model: "claude-sonnet-4-5-20250929",
messages: [{ role: "user", content: PROMPT }],
max_tokens: 500,
};
const CONNECTION_PARAMS = {
sample_rate: 16000,
speech_model: "u3-rt-pro",
format_turns: true,
min_turn_silence: 560,
max_turn_silence: 2000,
llm_gateway: JSON.stringify(LLM_GATEWAY_CONFIG),
};
const API_ENDPOINT = `wss://streaming.assemblyai.com/v3/ws?${querystring.stringify(CONNECTION_PARAMS)}`;
// Connect and stream
const ws = new WebSocket(API_ENDPOINT, {
headers: { Authorization: YOUR_API_KEY },
});
let micInstance;
ws.on("open", () => {
console.log("Connected — speak into your microphone. Press Ctrl+C to stop.\n");
micInstance = mic({ rate: "16000", channels: "1", debug: false });
const micStream = micInstance.getAudioStream();
micStream.on("data", (data) => {
if (ws.readyState === WebSocket.OPEN) ws.send(data);
});
micInstance.start();
});
ws.on("message", (message) => {
const data = JSON.parse(message);
if (data.type === "Turn") {
const transcript = data.transcript || "";
if (data.end_of_turn && transcript) {
console.log(`[Turn] ${transcript}\n`);
} else if (transcript) {
process.stdout.write(`\r ... ${transcript.slice(-80)}`);
}
} else if (data.type === "LLMGatewayResponse") {
const content =
data.data?.choices?.[0]?.message?.content || "";
console.log(`[Assistant] ${content}\n`);
} else if (data.type === "Termination") {
console.log(
`\nSession ended — ${data.audio_duration_seconds || 0}s of audio processed.`
);
}
});
ws.on("error", (err) => console.error(`Error: ${err}`));
ws.on("close", () => {
if (micInstance) micInstance.stop();
console.log("Disconnected.");
});
process.on("SIGINT", () => {
console.log("\nStopping...");
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ type: "Terminate" }));
}
setTimeout(() => {
if (micInstance) micInstance.stop();
ws.close();
process.exit(0);
}, 2000);
});
Example output
Example output
Connected — speak into your microphone. Press Ctrl+C to stop.
[Turn] So the main thing we need to decide today is whether we're going
with vendor A or vendor B for the new analytics platform.
[Assistant] The speaker is initiating a decision discussion about choosing
between two analytics platform vendors.
Action items: None yet — decision pending.
[Turn] I think vendor A has better pricing but vendor B has the integrations
we need. Can someone pull the comparison spreadsheet by Friday?
[Assistant] The speaker compared vendor pricing vs. integrations and requested
a comparison document.
Action items:
- Pull the vendor comparison spreadsheet by Friday
See the End-to-end examples overview for all available pipelines.