Transcribe a sales call with speaker labels and sentiment analysis, identify speakers by role, then use LLM Gateway to generate a coaching scorecard with talk/listen ratio and sentiment insights. Products used: Pre-recorded STT + speaker diarization + Speaker Identification + sentiment analysis + LLM Gateway Model selection: UsesDocumentation Index
Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt
Use this file to discover all available pages before exploring further.
universal-3-pro for the highest English accuracy. For multilingual sales teams, add universal-2 as a fallback.
- Python
- JavaScript
import requests
import time
from collections import Counter
# ── Config ────────────────────────────────────────────────────
base_url = "https://api.assemblyai.com"
headers = {"authorization": "YOUR_API_KEY"}
audio_url = "https://assembly.ai/wildfires.mp3"
# ── Step 1: Transcribe with speaker labels + sentiment analysis ──
data = {
"audio_url": audio_url,
"speech_models": ["universal-3-pro"],
"speaker_labels": True,
"sentiment_analysis": True,
}
response = requests.post(base_url + "/v2/transcript", headers=headers, json=data)
response.raise_for_status()
transcript_id = response.json()["id"]
while True:
result = requests.get(f"{base_url}/v2/transcript/{transcript_id}", headers=headers).json()
if result["status"] == "completed":
break
elif result["status"] == "error":
raise RuntimeError(f"Transcription failed: {result['error']}")
time.sleep(3)
# ── Step 2: Identify speakers by role ──
understanding_response = requests.post(
"https://llm-gateway.assemblyai.com/v1/understanding",
headers=headers,
json={
"transcript_id": transcript_id,
"speech_understanding": {
"request": {
"speaker_identification": {
"speaker_type": "role",
"known_values": ["Sales Rep", "Customer"],
}
}
},
},
)
understanding_response.raise_for_status()
identified = understanding_response.json()
# ── Step 3: Calculate talk/listen ratio per speaker ──
speaker_durations = Counter()
for utterance in identified["utterances"]:
duration_ms = utterance["end"] - utterance["start"]
speaker_durations[utterance["speaker"]] += duration_ms
total_ms = sum(speaker_durations.values())
talk_ratios = {
speaker: round(dur / total_ms * 100, 1)
for speaker, dur in speaker_durations.items()
}
# ── Step 4: Summarize sentiment shifts ──
sentiment_by_speaker = {}
for s in result["sentiment_analysis_results"]:
speaker = s.get("speaker", "Unknown")
sentiment_by_speaker.setdefault(speaker, []).append(s["sentiment"])
sentiment_summary = ""
for speaker, sentiments in sentiment_by_speaker.items():
counts = Counter(sentiments)
sentiment_summary += (
f"{speaker}: "
f"{counts.get('POSITIVE', 0)} positive, "
f"{counts.get('NEUTRAL', 0)} neutral, "
f"{counts.get('NEGATIVE', 0)} negative\n"
)
# ── Step 5: Format transcript and generate coaching scorecard ──
speaker_transcript = "\n".join(
f"{u['speaker']}: {u['text']}" for u in identified["utterances"]
)
llm_response = requests.post(
"https://llm-gateway.assemblyai.com/v1/chat/completions",
headers=headers,
json={
"model": "claude-sonnet-4-5-20250929",
"messages": [
{
"role": "user",
"content": (
"You are a sales coaching assistant. Analyze this sales call and produce a scorecard.\n\n"
f"Talk/listen ratios: {talk_ratios}\n\n"
f"Sentiment breakdown:\n{sentiment_summary}\n"
f"Transcript:\n{speaker_transcript}\n\n"
"Produce:\n"
"1. Call summary (2-3 sentences)\n"
"2. Talk/listen ratio analysis (ideal is 40/60 for the rep)\n"
"3. Customer sentiment shifts and what caused them\n"
"4. Top 3 coaching suggestions for the sales rep"
),
}
],
"max_tokens": 2000,
},
)
llm_response.raise_for_status()
print("=== Sales Call Scorecard ===\n")
print(llm_response.json()["choices"][0]["message"]["content"])
const baseUrl = "https://api.assemblyai.com";
const headers = {
authorization: "YOUR_API_KEY",
"Content-Type": "application/json",
};
const audioUrl = "https://assembly.ai/wildfires.mp3";
// Step 1: Transcribe with speaker labels + sentiment analysis
let res = await fetch(`${baseUrl}/v2/transcript`, {
method: "POST",
headers,
body: JSON.stringify({
audio_url: audioUrl,
speech_models: ["universal-3-pro"],
speaker_labels: true,
sentiment_analysis: true,
}),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const { id: transcriptId } = await res.json();
let result;
while (true) {
res = await fetch(`${baseUrl}/v2/transcript/${transcriptId}`, { headers });
result = await res.json();
if (result.status === "completed") break;
if (result.status === "error")
throw new Error(`Transcription failed: ${result.error}`);
await new Promise((r) => setTimeout(r, 3000));
}
// Step 2: Identify speakers by role
const understandingRes = await fetch(
"https://llm-gateway.assemblyai.com/v1/understanding",
{
method: "POST",
headers,
body: JSON.stringify({
transcript_id: transcriptId,
speech_understanding: {
request: {
speaker_identification: {
speaker_type: "role",
known_values: ["Sales Rep", "Customer"],
},
},
},
}),
}
);
if (!understandingRes.ok) throw new Error(`Error: ${understandingRes.status}`);
const identified = await understandingRes.json();
// Step 3: Calculate talk/listen ratio per speaker
const speakerDurations = {};
for (const u of identified.utterances) {
speakerDurations[u.speaker] =
(speakerDurations[u.speaker] || 0) + (u.end - u.start);
}
const totalMs = Object.values(speakerDurations).reduce((a, b) => a + b, 0);
const talkRatios = {};
for (const [speaker, dur] of Object.entries(speakerDurations)) {
talkRatios[speaker] = ((dur / totalMs) * 100).toFixed(1) + "%";
}
// Step 4: Summarize sentiment shifts
const sentimentBySpeaker = {};
for (const s of result.sentiment_analysis_results) {
const speaker = s.speaker || "Unknown";
if (!sentimentBySpeaker[speaker]) sentimentBySpeaker[speaker] = [];
sentimentBySpeaker[speaker].push(s.sentiment);
}
let sentimentSummary = "";
for (const [speaker, sentiments] of Object.entries(sentimentBySpeaker)) {
const counts = { POSITIVE: 0, NEUTRAL: 0, NEGATIVE: 0 };
sentiments.forEach((s) => counts[s]++);
sentimentSummary += `${speaker}: ${counts.POSITIVE} positive, ${counts.NEUTRAL} neutral, ${counts.NEGATIVE} negative\n`;
}
// Step 5: Format transcript and generate coaching scorecard
const speakerTranscript = identified.utterances
.map((u) => `${u.speaker}: ${u.text}`)
.join("\n");
res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
method: "POST",
headers,
body: JSON.stringify({
model: "claude-sonnet-4-5-20250929",
messages: [
{
role: "user",
content:
"You are a sales coaching assistant. Analyze this sales call and produce a scorecard.\n\n" +
`Talk/listen ratios: ${JSON.stringify(talkRatios)}\n\n` +
`Sentiment breakdown:\n${sentimentSummary}\n` +
`Transcript:\n${speakerTranscript}\n\n` +
"Produce:\n" +
"1. Call summary (2-3 sentences)\n" +
"2. Talk/listen ratio analysis (ideal is 40/60 for the rep)\n" +
"3. Customer sentiment shifts and what caused them\n" +
"4. Top 3 coaching suggestions for the sales rep",
},
],
max_tokens: 2000,
}),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const llmResult = await res.json();
console.log("=== Sales Call Scorecard ===\n");
console.log(llmResult.choices[0].message.content);
Example output
Example output
=== Sales Call Scorecard ===
## Call summary
This call discussed the environmental and health impacts of wildfire smoke on
US communities. The speakers covered air quality data, health risks, and
recommended precautions for the public.
## Talk/listen ratio
Sales Rep: 65.3% | Customer: 34.7%
Analysis: The ratio is inverted from the ideal 40/60 split. The rep dominated
the conversation — focus on asking more open-ended questions.
## Customer sentiment shifts
- Started neutral during introductions
- Shifted negative when discussing health risks and poor air quality readings
- Returned to neutral during the action-planning portion
## Coaching suggestions
1. Ask more discovery questions early to understand the customer's specific concerns
2. When the customer expresses concern, acknowledge before pivoting to solutions
3. Summarize key points at the end and confirm next steps with clear ownership
See the End-to-end examples overview for all available pipelines.