Speaker Diarization

Unlock the full power of your audio content with industry-leading speaker diarization. Identify speakers to create structured, speaker-labeled transcripts that bring clarity to even the most complex conversations.

Use our API

Get started with less than 10 lines of code

Simply enable Speaker Diarization in our API, and receive a detailed transcript with a list of utterances.

See how in docs

import assemblyai as aai

aai.settings.api_key = "YOUR_API_KEY"

transcriber = aai.Transcriber()

audio_url = (
    "https://assembly.ai/sports_injuries.mp3"
)

config = aai.TranscriptionConfig(speaker_labels=True)

transcript = transcriber.transcribe(audio_url, config)

print(transcript.text)

for utterance in transcript.utterances:
    print(f"Speaker {utterance.speaker}: {utterance.text}")

import { AssemblyAI } from 'assemblyai'

const client = new AssemblyAI({
  apiKey: 'YOUR_API_KEY'
})

const audioUrl =
  'https://assembly.ai/sports_injuries.mp3'

const params = {
  audio: audioUrl,
  speaker_labels: true
}

const run = async () => {
  const transcript = await client.transcripts.transcribe(params)
  console.log(transcript.text)

  for (let utterance of transcript.utterances!) {
    console.log(`Speaker ${utterance.speaker}: ${utterance.text}`)
  }
}

run()

package main

import (
    "context"
    "fmt"
    "os"

    aai "github.com/AssemblyAI/assemblyai-go-sdk"
)

func main() {
    ctx := context.Background()

    audioURL := "https://assembly.ai/sports_injuries.mp3"

    client := aai.NewClient("YOUR_API_KEY")

    params := &aai.TranscriptOptionalParams{
        SpeakerLabels: aai.Bool(true),
    }

    transcript, err := client.Transcripts.TranscribeFromURL(ctx, audioURL, params)
    if err != nil {
        fmt.Println("Something bad happened:", err)
        os.Exit(1)
    }

    fmt.Println(*transcript.Text)

    for _, utterance := range transcript.Utterances {
        fmt.Printf("Speaker %v: %v
", *utterance.Speaker, *utterance.Text)
    }
}

import com.assemblyai.api.AssemblyAI;
import com.assemblyai.api.resources.transcripts.types.*;

public final class App {
    public static void main(String[] args) {
        AssemblyAI client = AssemblyAI.builder()
                .apiKey("YOUR_API_KEY")
                .build();

        String audioUrl = "https://assembly.ai/sports_injuries.mp3";

        var params = TranscriptOptionalParams.builder()
                .speakerLabels(true)
                .build();

        Transcript transcript = client.transcripts().transcribe(audioUrl, params);

        System.out.println(transcript.getText().get());

        transcript.getUtterances().get().forEach(utterance ->
            System.out.println("Speaker " + utterance.getSpeaker() + ": " + utterance.getText())
        );
    }
}

require 'assemblyai'

client = AssemblyAI::Client.new(api_key: 'YOUR_API_KEY')

audio_url = 'https://assembly.ai/sports_injuries.mp3'

transcript = client.transcripts.transcribe(
  audio_url: audio_url,
  speaker_labels: true
)

abort transcript.error if transcript.status == AssemblyAI::Transcripts::TranscriptStatus::ERROR

puts transcript.text

transcript.utterances.each do |utterance|
  printf('Speaker %<speaker>s: %<text>s', speaker: utterance.speaker, text: utterance.text)
end

Improve transcription quality and readability

Reduce speaker misattribution and transcription errors, enabling cleaner data for NLP tasks and enhancing user experience in speech-to-text applications.

See benchmarks and latest improvements

Build confidently with accurate, multilingual speaker diarization

Maximize speaker count accuracy

Enhance conversation analysis and speaker-dependent AI models with industry-leading diarization accuracy. Our models achieve a 2.9% error rate, outperforming competitors in identifying the number of speakers.

See benchmarks and latest improvements

Broaden your application's reach

Support speaker diarization in 95 languages, enabling multilingual audio analysis and expanding your product's global market potential.

See how in docs

Make every voice count

Improve the readability of your transcriptions	Unlock call center insights	Create searchable, structured transcripts	Assess communication patterns
Optimize short-form content generation	Analyze agent vs. customer behavior	Reliable summarization and LLM analysis	Separate speakers to measure talk time

Join 200K+ developers building new experiences with voice data

CEO at Vidyo

"We have had a phenomenal experience so far. The integration was simple and easy for developers to get started. The accuracy is better than any other tools in the market (and we have tried them all). Highly recommend!"

CEO at Pathlight

"Works incredibly well out of the box. Allowed us to focus on product instead of infrastructure. As a result, we were able to bring a transformative new product to market in half the time."

Product Manager at Aloware

"The accuracy was strong, but the great documentation and unique models like Auto Chapters and Sentiment Analysis is what really won us over."

Chief Product Officer at CallRail

"Partnering with AssemblyAI has made it easy for us to deliver world-class voice intelligence powered by market-leading speech-to-text technology."

Unlock the value of voice data

Build what’s next on the platform powering thousands of the industry’s leading of Voice AI apps.

Try our API for free Contact sales