Create Custom Length Subtitles

While our SRT/VTT endpoints do allow you to customize the maximum number of characters per caption using the chars_per_caption URL parameter in your API requests, there are some use-cases that require a custom number of words in each subtitle. In this guide, we will demonstrate how to construct these subtitles yourself in Python!

Quickstart

import assemblyai as aai

aai.settings.api_key = "YOUR-API-KEY"

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

transcript = transcriber.transcribe("./my-audio.mp3", config)

def second_to_timecode(x: float) -> str:
    hour, x = divmod(x, 3600)
    minute, x = divmod(x, 60)
    second, x = divmod(x, 1)
    millisecond = int(x * 1000.)

    return '%.2d:%.2d:%.2d,%.3d' % (hour, minute, second, millisecond)

def generate_subtitles_by_word_count(transcript, words_per_line):
  output = []
  subtitle_index = 1  # Start subtitle index at 1
  word_count = 0
  current_words = []

  for sentence in transcript.get_sentences():
    for word in sentence.words:
      current_words.append(word)
      word_count += 1
      if word_count >= words_per_line or word == sentence.words[-1]:
        start_time = second_to_timecode(current_words[0].start / 1000)
        end_time = second_to_timecode(current_words[-1].end / 1000)
        subtitle_text = " ".join([word.text for word in current_words])
        output.append(str(subtitle_index))
        output.append("%s --> %s" % (start_time, end_time))
        output.append(subtitle_text)
        output.append("")
        current_words = []  # Reset for the next subtitle
        word_count = 0  # Reset word count
        subtitle_index += 1

  return output

subs = generate_subtitles_by_word_count(transcript, 6)
with open(f"{transcript.id}.srt", 'w') as o:
    final = '\n'.join(subs)
    o.write(final)

print("SRT file generated.")

Step-by-Step Instructions

pip install -U assemblyai

Create a main.py file and import the assemblyai package and set the API key.

import assemblyai as aai

aai.settings.api_key = "YOUR-API-KEY"

Create a Transcriber object.

config = aai.TranscriptionConfig(speech_models=["universal-3-pro", "universal-2"])
transcriber = aai.Transcriber()

Use the Transcriber object’s transcribe method and pass in the audio file’s path as a parameter. The transcribe method saves the results of the transcription to the Transcriber object’s transcript attribute.

transcript = transcriber.transcribe("./my-audio.mp3", config)

Alternatively, you can pass in the URL of the publicly accessible audio file on the internet.

transcript = transcriber.transcribe("https://storage.googleapis.com/aai-docs-samples/espn.m4a", config)

Define a function that converts seconds to timecodes

def second_to_timecode(x: float) -> str:
    hour, x = divmod(x, 3600)
    minute, x = divmod(x, 60)
    second, x = divmod(x, 1)
    millisecond = int(x * 1000.)

    return '%.2d:%.2d:%.2d,%.3d' % (hour, minute, second, millisecond)

Define a function that iterates through the transcripts object to construct a list according to the number of words per subtitle

def generate_subtitles_by_word_count(transcript, words_per_line):
  output = []
  subtitle_index = 1  # Start subtitle index at 1
  word_count = 0
  current_words = []

  for sentence in transcript.get_sentences():
    for word in sentence.words:
      current_words.append(word)
      word_count += 1
      if word_count >= words_per_line or word == sentence.words[-1]:
        start_time = second_to_timecode(current_words[0].start / 1000)
        end_time = second_to_timecode(current_words[-1].end / 1000)
        subtitle_text = " ".join([word.text for word in current_words])
        output.append(str(subtitle_index))
        output.append("%s --> %s" % (start_time, end_time))
        output.append(subtitle_text)
        output.append("")
        current_words = []  # Reset for the next subtitle
        word_count = 0  # Reset word count
        subtitle_index += 1

  return output

Generate your subtitle file

subs = generate_subtitles_by_word_count(transcript, 6)
with open(f"{transcript.id}.srt", 'w') as o:
    final = '\n'.join(subs)
    o.write(final)

print("SRT file generated.")

Run your script.

python main.py

Documentation Index

​Quickstart

​Step-by-Step Instructions

Quickstart

Step-by-Step Instructions