Speaker Diarization

Unlock the full power of your audio content with industry-leading speaker diarization. Identify speakers to create structured, speaker-labeled transcripts that bring clarity to even the most complex conversations.

Get started with less than 10 lines of code

Simply enable Speaker Diarization in our API, and receive a detailed transcript with a list of utterances.

See how in docs
import assemblyai as aai

aai.settings.api_key = "YOUR_API_KEY"

transcriber = aai.Transcriber()

audio_url = (
    "https://assembly.ai/sports_injuries.mp3"
)

config = aai.TranscriptionConfig(speaker_labels=True)

transcript = transcriber.transcribe(audio_url, config)

print(transcript.text)

for utterance in transcript.utterances:
    print(f"Speaker {utterance.speaker}: {utterance.text}")
import { AssemblyAI } from 'assemblyai'

const client = new AssemblyAI({
  apiKey: 'YOUR_API_KEY'
})

const audioUrl =
  'https://assembly.ai/sports_injuries.mp3'

const params = {
  audio: audioUrl,
  speaker_labels: true
}

const run = async () => {
  const transcript = await client.transcripts.transcribe(params)
  console.log(transcript.text)

  for (let utterance of transcript.utterances!) {
    console.log(`Speaker ${utterance.speaker}: ${utterance.text}`)
  }
}

run()
package main

import (
    "context"
    "fmt"
    "os"

    aai "github.com/AssemblyAI/assemblyai-go-sdk"
)

func main() {
    ctx := context.Background()

    audioURL := "https://assembly.ai/sports_injuries.mp3"

    client := aai.NewClient("YOUR_API_KEY")

    params := &aai.TranscriptOptionalParams{
        SpeakerLabels: aai.Bool(true),
    }

    transcript, err := client.Transcripts.TranscribeFromURL(ctx, audioURL, params)
    if err != nil {
        fmt.Println("Something bad happened:", err)
        os.Exit(1)
    }

    fmt.Println(*transcript.Text)

    for _, utterance := range transcript.Utterances {
        fmt.Printf("Speaker %v: %v
", *utterance.Speaker, *utterance.Text)
    }
}
import com.assemblyai.api.AssemblyAI;
import com.assemblyai.api.resources.transcripts.types.*;

public final class App {
    public static void main(String[] args) {
        AssemblyAI client = AssemblyAI.builder()
                .apiKey("YOUR_API_KEY")
                .build();

        String audioUrl = "https://assembly.ai/sports_injuries.mp3";

        var params = TranscriptOptionalParams.builder()
                .speakerLabels(true)
                .build();

        Transcript transcript = client.transcripts().transcribe(audioUrl, params);

        System.out.println(transcript.getText().get());

        transcript.getUtterances().get().forEach(utterance ->
            System.out.println("Speaker " + utterance.getSpeaker() + ": " + utterance.getText())
        );
    }
}
require 'assemblyai'

client = AssemblyAI::Client.new(api_key: 'YOUR_API_KEY')

audio_url = 'https://assembly.ai/sports_injuries.mp3'

transcript = client.transcripts.transcribe(
  audio_url: audio_url,
  speaker_labels: true
)

abort transcript.error if transcript.status == AssemblyAI::Transcripts::TranscriptStatus::ERROR

puts transcript.text

transcript.utterances.each do |utterance|
  printf('Speaker %<speaker>s: %<text>s', speaker: utterance.speaker, text: utterance.text)
end

Improve transcription quality and readability

Reduce speaker misattribution and transcription errors, enabling cleaner data for NLP tasks and enhancing user experience in speech-to-text applications.

See benchmarks and latest improvements

Make every voice count

Improve the readability of your transcriptions
Unlock call center insights
Create searchable, structured transcripts
Assess communication patterns
Optimize short-form content generation
Analyze agent vs. customer behavior
Reliable summarization and LLM analysis
Separate speakers to measure talk time

Unlock the value of voice data

Build what’s next on the platform powering thousands of the industry’s leading of Voice AI apps.