Documentation Index Fetch the complete documentation index at: https://assemblyai.com/docs/llms.txt
Use this file to discover all available pages before exploring further.
While our SRT/VTT endpoints do allow you to customize the maximum number of characters per caption using the chars_per_caption URL parameter in your API requests, there are some use-cases that require a custom number of words in each subtitle.
In this guide, we will demonstrate how to construct these subtitles yourself in Python!
Quickstart
import assemblyai as aai
aai.settings.api_key = "YOUR-API-KEY"
config = aai.TranscriptionConfig( speech_models = [ "universal-3-pro" , "universal-2" ])
transcriber = aai.Transcriber()
transcript = transcriber.transcribe( "./my-audio.mp3" , config)
def second_to_timecode ( x : float ) -> str :
hour, x = divmod (x, 3600 )
minute, x = divmod (x, 60 )
second, x = divmod (x, 1 )
millisecond = int (x * 1000 .)
return ' %.2d : %.2d : %.2d , %.3d ' % (hour, minute, second, millisecond)
def generate_subtitles_by_word_count ( transcript , words_per_line ):
output = []
subtitle_index = 1 # Start subtitle index at 1
word_count = 0
current_words = []
for sentence in transcript.get_sentences():
for word in sentence.words:
current_words.append(word)
word_count += 1
if word_count >= words_per_line or word == sentence.words[ - 1 ]:
start_time = second_to_timecode(current_words[ 0 ].start / 1000 )
end_time = second_to_timecode(current_words[ - 1 ].end / 1000 )
subtitle_text = " " .join([word.text for word in current_words])
output.append( str (subtitle_index))
output.append( " %s --> %s " % (start_time, end_time))
output.append(subtitle_text)
output.append( "" )
current_words = [] # Reset for the next subtitle
word_count = 0 # Reset word count
subtitle_index += 1
return output
subs = generate_subtitles_by_word_count(transcript, 6 )
with open ( f " { transcript.id } .srt" , 'w' ) as o:
final = ' \n ' .join(subs)
o.write(final)
print ( "SRT file generated." )
See all 47 lines
Step-by-Step Instructions
pip install -U assemblyai
Create a main.py file and import the assemblyai package and set the API key.
import assemblyai as aai
aai.settings.api_key = "YOUR-API-KEY"
Create a Transcriber object.
config = aai.TranscriptionConfig( speech_models = [ "universal-3-pro" , "universal-2" ])
transcriber = aai.Transcriber()
Use the Transcriber object’s transcribe method and pass in the audio file’s path as a parameter. The transcribe method saves the results of the transcription to the Transcriber object’s transcript attribute.
transcript = transcriber.transcribe( "./my-audio.mp3" , config)
Alternatively, you can pass in the URL of the publicly accessible audio file on the internet.
transcript = transcriber.transcribe( "https://storage.googleapis.com/aai-docs-samples/espn.m4a" , config)
Define a function that converts seconds to timecodes
def second_to_timecode ( x : float ) -> str :
hour, x = divmod (x, 3600 )
minute, x = divmod (x, 60 )
second, x = divmod (x, 1 )
millisecond = int (x * 1000 .)
return ' %.2d : %.2d : %.2d , %.3d ' % (hour, minute, second, millisecond)
Define a function that iterates through the transcripts object to construct a list according to the number of words per subtitle
def generate_subtitles_by_word_count ( transcript , words_per_line ):
output = []
subtitle_index = 1 # Start subtitle index at 1
word_count = 0
current_words = []
for sentence in transcript.get_sentences():
for word in sentence.words:
current_words.append(word)
word_count += 1
if word_count >= words_per_line or word == sentence.words[ - 1 ]:
start_time = second_to_timecode(current_words[ 0 ].start / 1000 )
end_time = second_to_timecode(current_words[ - 1 ].end / 1000 )
subtitle_text = " " .join([word.text for word in current_words])
output.append( str (subtitle_index))
output.append( " %s --> %s " % (start_time, end_time))
output.append(subtitle_text)
output.append( "" )
current_words = [] # Reset for the next subtitle
word_count = 0 # Reset word count
subtitle_index += 1
return output
See all 23 lines
Generate your subtitle file
subs = generate_subtitles_by_word_count(transcript, 6 )
with open ( f " { transcript.id } .srt" , 'w' ) as o:
final = ' \n ' .join(subs)
o.write(final)
print ( "SRT file generated." )
Run your script.