Medical scribe - AssemblyAI

Transcribe a clinical encounter using Medical Mode with speaker labels and entity detection, identify speakers by role, then use LLM Gateway to generate a structured SOAP note. Products used: Pre-recorded STT + Medical Mode + speaker diarization + Speaker Identification + entity detection + LLM Gateway Model selection: Uses universal-3-pro with "domain": "medical-v1" for purpose-built accuracy on medical terminology, drug names, and clinical language.

Python
Python SDK
JavaScript
JavaScript SDK

import requests
import time

# ── Config ────────────────────────────────────────────────────
base_url = "https://api.assemblyai.com"
headers = {"authorization": "YOUR_API_KEY"}

audio_url = "https://assembly.ai/wildfires.mp3"  # Replace with your clinical audio

# ── Step 1: Transcribe with Medical Mode + speaker labels + entity detection ──
data = {
    "audio_url": audio_url,
    "speech_models": ["universal-3-pro"],
    "domain": "medical-v1",
    "speaker_labels": True,
    "entity_detection": True,
}

response = requests.post(base_url + "/v2/transcript", headers=headers, json=data)
response.raise_for_status()
transcript_id = response.json()["id"]

while True:
    result = requests.get(f"{base_url}/v2/transcript/{transcript_id}", headers=headers).json()
    if result["status"] == "completed":
        break
    elif result["status"] == "error":
        raise RuntimeError(f"Transcription failed: {result['error']}")
    time.sleep(3)

# ── Step 2: Identify speakers by role ──
understanding_response = requests.post(
    "https://llm-gateway.assemblyai.com/v1/understanding",
    headers=headers,
    json={
        "transcript_id": transcript_id,
        "speech_understanding": {
            "request": {
                "speaker_identification": {
                    "speaker_type": "role",
                    "known_values": ["Provider", "Patient"],
                }
            }
        },
    },
)
understanding_response.raise_for_status()
identified = understanding_response.json()

# ── Step 3: Extract detected entities ──
entities = result.get("entities", [])
entity_summary = "\n".join(
    f"- {e['entity_type']}: {e['text']}" for e in entities
)

# ── Step 4: Format identified transcript ──
speaker_transcript = "\n".join(
    f"{u['speaker']}: {u['text']}" for u in identified["utterances"]
)

# ── Step 5: Generate SOAP note via LLM Gateway ──
llm_response = requests.post(
    "https://llm-gateway.assemblyai.com/v1/chat/completions",
    headers=headers,
    json={
        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",
                "content": (
                    "You are a medical scribe. Given the clinical encounter transcript and "
                    "detected entities below, generate a structured SOAP note.\n\n"
                    "Format the note with these sections:\n"
                    "- **Subjective**: Patient's reported symptoms and history\n"
                    "- **Objective**: Clinical observations and measurements\n"
                    "- **Assessment**: Diagnosis or clinical impression\n"
                    "- **Plan**: Treatment plan, prescriptions, and follow-up\n\n"
                    f"Detected entities:\n{entity_summary}\n\n"
                    f"Transcript:\n{speaker_transcript}"
                ),
            }
        ],
        "max_tokens": 2000,
    },
)
llm_response.raise_for_status()

print("=== SOAP Note ===\n")
print(llm_response.json()["choices"][0]["message"]["content"])

import assemblyai as aai
import requests

# ── Config ────────────────────────────────────────────────────
api_key = "YOUR_API_KEY"
aai.settings.api_key = api_key
headers = {"authorization": api_key}

audio_url = "https://assembly.ai/wildfires.mp3"  # Replace with your clinical audio

# ── Step 1: Transcribe with Medical Mode + speaker labels + entity detection ──
config = aai.TranscriptionConfig(
    speech_models=["universal-3-pro"],
    domain="medical-v1",
    speaker_labels=True,
    entity_detection=True,
)

transcript = aai.Transcriber().transcribe(audio_url, config)

if transcript.error:
    raise RuntimeError(f"Transcription failed: {transcript.error}")

# ── Step 2: Identify speakers by role ──
understanding_response = requests.post(
    "https://llm-gateway.assemblyai.com/v1/understanding",
    headers=headers,
    json={
        "transcript_id": transcript.id,
        "speech_understanding": {
            "request": {
                "speaker_identification": {
                    "speaker_type": "role",
                    "known_values": ["Provider", "Patient"],
                }
            }
        },
    },
)
understanding_response.raise_for_status()
identified = understanding_response.json()

# ── Step 3: Extract detected entities ──
entities = transcript.entities or []
entity_summary = "\n".join(
    f"- {e.entity_type}: {e.text}" for e in entities
)

# ── Step 4: Format identified transcript ──
speaker_transcript = "\n".join(
    f"{u['speaker']}: {u['text']}" for u in identified["utterances"]
)

# ── Step 5: Generate SOAP note via LLM Gateway ──
llm_response = requests.post(
    "https://llm-gateway.assemblyai.com/v1/chat/completions",
    headers=headers,
    json={
        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",
                "content": (
                    "You are a medical scribe. Given the clinical encounter transcript and "
                    "detected entities below, generate a structured SOAP note.\n\n"
                    "Format the note with these sections:\n"
                    "- **Subjective**: Patient's reported symptoms and history\n"
                    "- **Objective**: Clinical observations and measurements\n"
                    "- **Assessment**: Diagnosis or clinical impression\n"
                    "- **Plan**: Treatment plan, prescriptions, and follow-up\n\n"
                    f"Detected entities:\n{entity_summary}\n\n"
                    f"Transcript:\n{speaker_transcript}"
                ),
            }
        ],
        "max_tokens": 2000,
    },
)
llm_response.raise_for_status()

print("=== SOAP Note ===\n")
print(llm_response.json()["choices"][0]["message"]["content"])

const baseUrl = "https://api.assemblyai.com";
const headers = {
  authorization: "YOUR_API_KEY",
  "Content-Type": "application/json",
};

const audioUrl = "https://assembly.ai/wildfires.mp3"; // Replace with your clinical audio

// Step 1: Transcribe with Medical Mode + speaker labels + entity detection
let res = await fetch(`${baseUrl}/v2/transcript`, {
  method: "POST",
  headers,
  body: JSON.stringify({
    audio_url: audioUrl,
    speech_models: ["universal-3-pro"],
    domain: "medical-v1",
    speaker_labels: true,
    entity_detection: true,
  }),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const { id: transcriptId } = await res.json();

let result;
while (true) {
  res = await fetch(`${baseUrl}/v2/transcript/${transcriptId}`, { headers });
  result = await res.json();
  if (result.status === "completed") break;
  if (result.status === "error")
    throw new Error(`Transcription failed: ${result.error}`);
  await new Promise((r) => setTimeout(r, 3000));
}

// Step 2: Identify speakers by role
const understandingRes = await fetch(
  "https://llm-gateway.assemblyai.com/v1/understanding",
  {
    method: "POST",
    headers,
    body: JSON.stringify({
      transcript_id: transcriptId,
      speech_understanding: {
        request: {
          speaker_identification: {
            speaker_type: "role",
            known_values: ["Provider", "Patient"],
          },
        },
      },
    }),
  }
);
if (!understandingRes.ok) throw new Error(`Error: ${understandingRes.status}`);
const identified = await understandingRes.json();

// Step 3: Extract detected entities
const entities = result.entities || [];
const entitySummary = entities
  .map((e) => `- ${e.entity_type}: ${e.text}`)
  .join("\n");

// Step 4: Format identified transcript
const speakerTranscript = identified.utterances
  .map((u) => `${u.speaker}: ${u.text}`)
  .join("\n");

// Step 5: Generate SOAP note via LLM Gateway
res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
  method: "POST",
  headers,
  body: JSON.stringify({
    model: "claude-sonnet-4-5-20250929",
    messages: [
      {
        role: "user",
        content:
          "You are a medical scribe. Given the clinical encounter transcript and " +
          "detected entities below, generate a structured SOAP note.\n\n" +
          "Format the note with these sections:\n" +
          "- **Subjective**: Patient's reported symptoms and history\n" +
          "- **Objective**: Clinical observations and measurements\n" +
          "- **Assessment**: Diagnosis or clinical impression\n" +
          "- **Plan**: Treatment plan, prescriptions, and follow-up\n\n" +
          `Detected entities:\n${entitySummary}\n\n` +
          `Transcript:\n${speakerTranscript}`,
      },
    ],
    max_tokens: 2000,
  }),
});
if (!res.ok) throw new Error(`Error: ${res.status}`);
const llmResult = await res.json();

console.log("=== SOAP Note ===\n");
console.log(llmResult.choices[0].message.content);

import { AssemblyAI } from "assemblyai";

// ── Config ────────────────────────────────────────────────────
const apiKey = "YOUR_API_KEY";
const client = new AssemblyAI({ apiKey });
const headers = {
  authorization: apiKey,
  "Content-Type": "application/json",
};

const audioUrl = "https://assembly.ai/wildfires.mp3"; // Replace with your clinical audio

// Step 1: Transcribe with Medical Mode + speaker labels + entity detection
const transcript = await client.transcripts.transcribe({
  audio: audioUrl,
  speech_models: ["universal-3-pro"],
  domain: "medical-v1",
  speaker_labels: true,
  entity_detection: true,
});

if (transcript.error) {
  throw new Error(`Transcription failed: ${transcript.error}`);
}

// Step 2: Identify speakers by role
const understandingRes = await fetch(
  "https://llm-gateway.assemblyai.com/v1/understanding",
  {
    method: "POST",
    headers,
    body: JSON.stringify({
      transcript_id: transcript.id,
      speech_understanding: {
        request: {
          speaker_identification: {
            speaker_type: "role",
            known_values: ["Provider", "Patient"],
          },
        },
      },
    }),
  }
);
if (!understandingRes.ok) throw new Error(`Error: ${understandingRes.status}`);
const identified = await understandingRes.json();

// Step 3: Extract detected entities
const entities = transcript.entities || [];
const entitySummary = entities
  .map((e) => `- ${e.entity_type}: ${e.text}`)
  .join("\n");

// Step 4: Format identified transcript
const speakerTranscript = identified.utterances
  .map((u) => `${u.speaker}: ${u.text}`)
  .join("\n");

// Step 5: Generate SOAP note via LLM Gateway
const llmRes = await fetch(
  "https://llm-gateway.assemblyai.com/v1/chat/completions",
  {
    method: "POST",
    headers,
    body: JSON.stringify({
      model: "claude-sonnet-4-5-20250929",
      messages: [
        {
          role: "user",
          content:
            "You are a medical scribe. Given the clinical encounter transcript and " +
            "detected entities below, generate a structured SOAP note.\n\n" +
            "Format the note with these sections:\n" +
            "- **Subjective**: Patient's reported symptoms and history\n" +
            "- **Objective**: Clinical observations and measurements\n" +
            "- **Assessment**: Diagnosis or clinical impression\n" +
            "- **Plan**: Treatment plan, prescriptions, and follow-up\n\n" +
            `Detected entities:\n${entitySummary}\n\n` +
            `Transcript:\n${speakerTranscript}`,
        },
      ],
      max_tokens: 2000,
    }),
  }
);
if (!llmRes.ok) throw new Error(`Error: ${llmRes.status}`);
const llmResult = await llmRes.json();

console.log("=== SOAP Note ===\n");
console.log(llmResult.choices[0].message.content);

Example output

=== SOAP Note ===

## Subjective
Patient reports exposure to wildfire smoke over the past several days. Describes
worsening cough, shortness of breath, and eye irritation. Symptoms began
approximately 3 days ago coinciding with elevated air quality alerts in the region.

## Objective
- AQI reading: 150 micrograms per cubic meter (10x annual average)
- Particulate matter levels classified as "unhealthy"
- Patient appears alert and oriented

## Assessment
Acute respiratory irritation secondary to wildfire smoke exposure.
Environmental exposure consistent with regional air quality emergency.

## Plan
1. Advise patient to remain indoors with windows closed
2. Recommend N95 mask for any necessary outdoor activity
3. Prescribe albuterol inhaler PRN for acute bronchospasm
4. Follow up in 1 week or sooner if symptoms worsen
5. Refer to pulmonology if symptoms persist beyond 2 weeks

For more on building clinical documentation apps, see the Medical Scribe guides.

See the End-to-end examples overview for all available pipelines.

Documentation Index