Extract Quotes with Timestamps Using LLM Gateway + Semantic Search

This guide will demonstrate how to use AssemblyAI’s LLM Gateway framework to process an audio file and find the best quotes included in it through Semantic Search.

Quickstart

Python
JavaScript

import datetime
import numpy as np
import requests
import time
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer

# Configuration
api_key = "<YOUR_API_KEY>"
base_url = "https://api.assemblyai.com"
headers = {"authorization": api_key}

def upload_file(file_path):
    """Upload a local audio file to AssemblyAI"""
    with open(file_path, "rb") as f:
        response = requests.post(f"{base_url}/v2/upload", headers=headers, data=f)
        if response.status_code != 200:
            print(f"Error uploading: {response.status_code}, {response.text}")
            response.raise_for_status()
        return response.json()["upload_url"]

def transcribe_audio(audio_url):
    """Submit audio for transcription with sentences enabled and poll until complete"""
    data = {
        "audio_url": audio_url,
        "speech_models": ["universal-3-pro"],
        "auto_highlights": False,
        "sentiment_analysis": False,
        "entity_detection": False
    }

    response = requests.post(f"{base_url}/v2/transcript", headers=headers, json=data)

    if response.status_code != 200:
        print(f"Error submitting transcription: {response.status_code}, {response.text}")
        response.raise_for_status()

    transcript_id = response.json()["id"]
    polling_endpoint = f"{base_url}/v2/transcript/{transcript_id}"

    print("Transcribing...")
    while True:
        transcript = requests.get(polling_endpoint, headers=headers).json()
        if transcript["status"] == "completed":
            print("Transcription completed!")
            return transcript
        elif transcript["status"] == "error":
            raise RuntimeError(f"Transcription failed: {transcript['error']}")
        else:
            time.sleep(3)

def get_sentences(transcript_id):
    """Get sentences from a completed transcript"""
    sentences_endpoint = f"{base_url}/v2/transcript/{transcript_id}/sentences"
    response = requests.get(sentences_endpoint, headers=headers)

    if response.status_code != 200:
        print(f"Error getting sentences: {response.status_code}, {response.text}")
        response.raise_for_status()

    return response.json()["sentences"]

def process_with_llm_gateway(transcript_text, question, context=""):
    """Send transcript to LLM Gateway for question answering"""
    prompt = f"""Based on the following transcript, please answer this question:
            Question: {question}
            Context: {context}
            Transcript: {transcript_text}
            Please provide a clear and specific answer."""

    llm_gateway_data = {
        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "max_tokens": 2000
    }

    response = requests.post(
        "https://llm-gateway.assemblyai.com/v1/chat/completions",
        headers=headers,
        json=llm_gateway_data
    )

    result = response.json()

    if "error" in result:
        raise RuntimeError(f"LLM Gateway error: {result['error']}")

    return result['choices'][0]['message']['content']

def sliding_window(elements, distance, stride):
    """Create sliding windows of elements"""
    idx = 0
    results = []
    while idx + distance < len(elements):
        results.append(elements[idx:idx + distance])
        idx += (distance - stride)
    return results

# Main execution
# If using a local file:
audio_url = upload_file("<YOUR_AUDIO FILE>")

# If using a public URL:
# audio_url = "<YOUR_AUDIO_URL>"

# Transcribe audio
transcript = transcribe_audio(audio_url)
transcript_text = transcript["text"]
transcript_id = transcript["id"]

# Get sentences
print("Getting sentences...")
sentences = get_sentences(transcript_id)

# Initialize embedder
embedder = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
embeddings = {}

# Create sliding window of sentences and generate embeddings
print("Creating embeddings...")
sentence_groups = sliding_window(sentences, 5, 2)

for sentence_group in sentence_groups:
    combined_text = " ".join([sentence["text"] for sentence in sentence_group])
    start = sentence_group[0]["start"]
    end = sentence_group[-1]["end"]

    embeddings[(start, end, transcript_id, combined_text)] = embedder.encode(combined_text)

# Use LLM Gateway to find the best quotes
print("Asking LLM Gateway for best quotes...")
question = "What are the 3 best quotes from this video?"
context = "Please provide exactly 3 quotes."

llm_answer = process_with_llm_gateway(transcript_text, question, context)
print(f"\nLLM Gateway Response:\n{llm_answer}\n")

# Embed the LLM output
llm_gateway_embedding = embedder.encode(llm_answer)

# Vectorize transcript embeddings
np_embeddings = np.array(list(embeddings.values()))
metadata = list(embeddings.keys())

# Find the top 3 most similar quotes
print("Finding matching quotes in transcript...")
knn = NearestNeighbors(n_neighbors=3, metric="cosine")
knn.fit(np_embeddings)
distances, indices = knn.kneighbors([llm_gateway_embedding])

matches = []
for distance, index in zip(distances[0], indices[0]):
    result_metadata = metadata[index]
    matches.append(
        {
            "start_timestamp": result_metadata[0],
            "end_timestamp": result_metadata[1],
            "transcript_id": result_metadata[2],
            "text": result_metadata[3],
            "confidence": 1 - distance,
        }
    )

# Display results
print("\n" + "="*80)
print("BEST MATCHING QUOTES FROM TRANSCRIPT:")
print("="*80 + "\n")

for index, m in enumerate(matches):
    print('QUOTE #{}: "{}"'.format(index + 1, m['text']))
    print('START TIMESTAMP:', str(datetime.timedelta(seconds=m['start_timestamp']/1000)))
    print('END TIMESTAMP:', str(datetime.timedelta(seconds=m['end_timestamp']/1000)))
    print('CONFIDENCE:', m['confidence'])
    print()

import { pipeline } from "@xenova/transformers";

// Configuration
const apiKey = "<YOUR_API_KEY>";
const baseUrl = "https://api.assemblyai.com";
const headers = { authorization: apiKey };

function msToTime(ms) {
  const totalSeconds = Math.floor(ms / 1000);
  const h = Math.floor(totalSeconds / 3600);
  const m = Math.floor((totalSeconds % 3600) / 60);
  const s = totalSeconds % 60;
  return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
}

function cosineSimilarity(a, b) {
  const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
  const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
  const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
  return dot / (magA * magB);
}

async function uploadFile(filePath) {
  // Upload a local audio file to AssemblyAI
  const { default: fs } = await import("fs");
  const fileData = await fs.promises.readFile(filePath);
  const res = await fetch(`${baseUrl}/v2/upload`, {
    method: "POST",
    headers,
    body: fileData,
  });
  if (!res.ok) {
    throw new Error(`Error uploading: ${res.status}`);
  }
  const response = await res.json();
  return response.upload_url;
}

async function transcribeAudio(audioUrl) {
  // Submit audio for transcription with sentences enabled and poll until complete
  const data = {
    audio_url: audioUrl,
    speech_models: ["universal-3-pro"],
    auto_highlights: false,
    sentiment_analysis: false,
    entity_detection: false,
  };

  const res = await fetch(`${baseUrl}/v2/transcript`, {
    method: "POST",
    headers: { ...headers, "Content-Type": "application/json" },
    body: JSON.stringify(data),
  });

  if (!res.ok) {
    throw new Error(`Error submitting transcription: ${res.status}`);
  }

  const response = await res.json();
  const transcriptId = response.id;
  const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`;

  console.log("Transcribing...");
  while (true) {
    const res = await fetch(pollingEndpoint, { headers });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const transcript = await res.json();
    if (transcript.status === "completed") {
      console.log("Transcription completed!");
      return transcript;
    } else if (transcript.status === "error") {
      throw new Error(`Transcription failed: ${transcript.error}`);
    } else {
      await new Promise((resolve) => setTimeout(resolve, 3000));
    }
  }
}

async function getSentences(transcriptId) {
  // Get sentences from a completed transcript
  const sentencesEndpoint = `${baseUrl}/v2/transcript/${transcriptId}/sentences`;
  const res = await fetch(sentencesEndpoint, { headers });

  if (!res.ok) {
    throw new Error(`Error getting sentences: ${res.status}`);
  }

  const response = await res.json();
  return response.sentences;
}

async function processWithLlmGateway(transcriptText, question, context = "") {
  // Send transcript to LLM Gateway for question answering
  const prompt = `Based on the following transcript, please answer this question:
            Question: ${question}
            Context: ${context}
            Transcript: ${transcriptText}
            Please provide a clear and specific answer.`;

  const llmGatewayData = {
    model: "claude-sonnet-4-5-20250929",
    messages: [
      {
        role: "user",
        content: prompt,
      },
    ],
    max_tokens: 2000,
  };

  const res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
    method: "POST",
    headers: { ...headers, "Content-Type": "application/json" },
    body: JSON.stringify(llmGatewayData),
  });
  if (!res.ok) throw new Error(`Error: ${res.status}`);
  const response = await res.json();

  if (response.error) {
    throw new Error(`LLM Gateway error: ${response.error}`);
  }

  return response.choices[0].message.content;
}

function slidingWindow(elements, distance, stride) {
  // Create sliding windows of elements
  let idx = 0;
  const results = [];
  while (idx + distance < elements.length) {
    results.push(elements.slice(idx, idx + distance));
    idx += distance - stride;
  }
  return results;
}

// Main execution
// If using a local file:
const audioUrl = await uploadFile("<YOUR_AUDIO FILE>");

// If using a public URL:
// const audioUrl = "<YOUR_AUDIO_URL>";

// Transcribe audio
const transcript = await transcribeAudio(audioUrl);
const transcriptText = transcript.text;
const transcriptId = transcript.id;

// Get sentences
console.log("Getting sentences...");
const sentences = await getSentences(transcriptId);

// Initialize embedder
const embedder = await pipeline(
  "feature-extraction",
  "Xenova/multi-qa-mpnet-base-dot-v1"
);
const embeddingKeys = [];
const embeddingValues = [];

// Create sliding window of sentences and generate embeddings
console.log("Creating embeddings...");
const sentenceGroups = slidingWindow(sentences, 5, 2);

for (const sentenceGroup of sentenceGroups) {
  const combinedText = sentenceGroup.map((s) => s.text).join(" ");
  const start = sentenceGroup[0].start;
  const end = sentenceGroup[sentenceGroup.length - 1].end;

  const output = await embedder(combinedText, {
    pooling: "mean",
    normalize: true,
  });
  const embedding = Array.from(output.data);

  embeddingKeys.push({ start, end, transcriptId, combinedText });
  embeddingValues.push(embedding);
}

// Use LLM Gateway to find the best quotes
console.log("Asking LLM Gateway for best quotes...");
const question = "What are the 3 best quotes from this video?";
const context = "Please provide exactly 3 quotes.";

const llmAnswer = await processWithLlmGateway(
  transcriptText,
  question,
  context
);
console.log(`\nLLM Gateway Response:\n${llmAnswer}\n`);

// Embed the LLM output
const llmOutput = await embedder(llmAnswer, { pooling: "mean", normalize: true });
const llmGatewayEmbedding = Array.from(llmOutput.data);

// Find the top 3 most similar quotes
console.log("Finding matching quotes in transcript...");
const similarities = embeddingKeys.map((key, idx) => ({
  key,
  similarity: cosineSimilarity(llmGatewayEmbedding, embeddingValues[idx]),
}));
similarities.sort((a, b) => b.similarity - a.similarity);
const matches = similarities.slice(0, 3).map(({ key, similarity }) => ({
  startTimestamp: key.start,
  endTimestamp: key.end,
  transcriptId: key.transcriptId,
  text: key.combinedText,
  confidence: similarity,
}));

// Display results
console.log("\n" + "=".repeat(80));
console.log("BEST MATCHING QUOTES FROM TRANSCRIPT:");
console.log("=".repeat(80) + "\n");

matches.forEach((m, index) => {
  console.log(`QUOTE #${index + 1}: "${m.text}"`);
  console.log("START TIMESTAMP:", msToTime(m.startTimestamp));
  console.log("END TIMESTAMP:", msToTime(m.endTimestamp));
  console.log("CONFIDENCE:", m.confidence);
  console.log();
});

Getting Started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an AssemblyAI account and get your API key from your dashboard.

Step-by-Step Instructions

Install the required packages:

Python
JavaScript

You’ll need to install a few libraries that this code depends on:

Numpy, a scientific computing library.
Sciki-Learn, a library for predictive data analysis.
Sentence-Transformers, a framework for state-of-the-art sentence and text embedding.

pip install -U numpy scikit-learn sentence-transformers

npm install @xenova/transformers

Then import all of these libraries and set our AssemblyAI API key, headers, and base URL.

Python
JavaScript

import datetime
import numpy as np
import requests
import time
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer

# Configuration
api_key = "<YOUR_API_KEY>"
base_url = "https://api.assemblyai.com"
headers = {"authorization": api_key}

import { pipeline } from "@xenova/transformers";

// Configuration
const apiKey = "<YOUR_API_KEY>";
const baseUrl = "https://api.assemblyai.com";
const headers = { authorization: apiKey };

Next, define functions to upload and transcribe files using AssemblyAI’s Async API, as well as request sentences.

Python
JavaScript

def upload_file(file_path):
    """Upload a local audio file to AssemblyAI"""
    with open(file_path, "rb") as f:
        response = requests.post(f"{base_url}/v2/upload", headers=headers, data=f)
        if response.status_code != 200:
            print(f"Error uploading: {response.status_code}, {response.text}")
            response.raise_for_status()
        return response.json()["upload_url"]

def transcribe_audio(audio_url):
    """Submit audio for transcription with sentences enabled and poll until complete"""
    data = {
        "audio_url": audio_url,
        "speech_models": ["universal-3-pro"],
        "auto_highlights": False,
        "sentiment_analysis": False,
        "entity_detection": False
    }

    response = requests.post(f"{base_url}/v2/transcript", headers=headers, json=data)

    if response.status_code != 200:
        print(f"Error submitting transcription: {response.status_code}, {response.text}")
        response.raise_for_status()

    transcript_id = response.json()["id"]
    polling_endpoint = f"{base_url}/v2/transcript/{transcript_id}"

    print("Transcribing...")
    while True:
        transcript = requests.get(polling_endpoint, headers=headers).json()
        if transcript["status"] == "completed":
            print("Transcription completed!")
            return transcript
        elif transcript["status"] == "error":
            raise RuntimeError(f"Transcription failed: {transcript['error']}")
        else:
            time.sleep(3)

def get_sentences(transcript_id):
    """Get sentences from a completed transcript"""
    sentences_endpoint = f"{base_url}/v2/transcript/{transcript_id}/sentences"
    response = requests.get(sentences_endpoint, headers=headers)

    if response.status_code != 200:
        print(f"Error getting sentences: {response.status_code}, {response.text}")
        response.raise_for_status()

    return response.json()["sentences"]

async function uploadFile(filePath) {
  // Upload a local audio file to AssemblyAI
  const { default: fs } = await import("fs");
  const fileData = await fs.promises.readFile(filePath);
  const res = await fetch(`${baseUrl}/v2/upload`, {
    method: "POST",
    headers,
    body: fileData,
  });
  if (!res.ok) {
    throw new Error(`Error uploading: ${res.status}`);
  }
  const response = await res.json();
  return response.upload_url;
}

async function transcribeAudio(audioUrl) {
  // Submit audio for transcription with sentences enabled and poll until complete
  const data = {
    audio_url: audioUrl,
    speech_models: ["universal-3-pro"],
    auto_highlights: false,
    sentiment_analysis: false,
    entity_detection: false,
  };

  const res = await fetch(`${baseUrl}/v2/transcript`, {
    method: "POST",
    headers: { ...headers, "Content-Type": "application/json" },
    body: JSON.stringify(data),
  });

  if (!res.ok) {
    throw new Error(`Error submitting transcription: ${res.status}`);
  }

  const response = await res.json();
  const transcriptId = response.id;
  const pollingEndpoint = `${baseUrl}/v2/transcript/${transcriptId}`;

  console.log("Transcribing...");
  while (true) {
    const res = await fetch(pollingEndpoint, { headers });
    if (!res.ok) throw new Error(`Error: ${res.status}`);
    const transcript = await res.json();
    if (transcript.status === "completed") {
      console.log("Transcription completed!");
      return transcript;
    } else if (transcript.status === "error") {
      throw new Error(`Transcription failed: ${transcript.error}`);
    } else {
      await new Promise((resolve) => setTimeout(resolve, 3000));
    }
  }
}

async function getSentences(transcriptId) {
  // Get sentences from a completed transcript
  const sentencesEndpoint = `${baseUrl}/v2/transcript/${transcriptId}/sentences`;
  const res = await fetch(sentencesEndpoint, { headers });

  if (!res.ok) {
    throw new Error(`Error getting sentences: ${res.status}`);
  }

  const response = await res.json();
  return response.sentences;
}

Then define a function to process each transcript text with LLM Gateway.

Python
JavaScript

def process_with_llm_gateway(transcript_text, question, context=""):
    """Send transcript to LLM Gateway for question answering"""
    prompt = f"""Based on the following transcript, please answer this question:
            Question: {question}
            Context: {context}
            Transcript: {transcript_text}
            Please provide a clear and specific answer."""

    llm_gateway_data = {
        "model": "claude-sonnet-4-5-20250929",
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "max_tokens": 2000
    }

    response = requests.post(
        "https://llm-gateway.assemblyai.com/v1/chat/completions",
        headers=headers,
        json=llm_gateway_data
    )

    result = response.json()

    if "error" in result:
        raise RuntimeError(f"LLM Gateway error: {result['error']}")

    return result['choices'][0]['message']['content']

async function processWithLlmGateway(transcriptText, question, context = "") {
  // Send transcript to LLM Gateway for question answering
  const prompt = `Based on the following transcript, please answer this question:
            Question: ${question}
            Context: ${context}
            Transcript: ${transcriptText}
            Please provide a clear and specific answer.`;

  const llmGatewayData = {
    model: "claude-sonnet-4-5-20250929",
    messages: [
      {
        role: "user",
        content: prompt,
      },
    ],
    max_tokens: 2000,
  };

  const res = await fetch("https://llm-gateway.assemblyai.com/v1/chat/completions", {
    method: "POST",
    headers: { ...headers, "Content-Type": "application/json" },
    body: JSON.stringify(llmGatewayData),
  });
  if (!res.ok) throw new Error(`Error: ${res.status}`);
  const response = await res.json();

  if (response.error) {
    throw new Error(`LLM Gateway error: ${response.error}`);
  }

  return response.choices[0].message.content;
}

Define a function to implement a sliding window, which allows us to group sentences together in different combinations to retain their semantic meaning and context while also enabling us to customize the length (and thus duration) of the quotes.

Python
JavaScript

def sliding_window(elements, distance, stride):
    """Create sliding windows of elements"""
    idx = 0
    results = []
    while idx + distance < len(elements):
        results.append(elements[idx:idx + distance])
        idx += (distance - stride)
    return results

function slidingWindow(elements, distance, stride) {
  // Create sliding windows of elements
  let idx = 0;
  const results = [];
  while (idx + distance < elements.length) {
    results.push(elements.slice(idx, idx + distance));
    idx += distance - stride;
  }
  return results;
}

Execute all upload and transcription functions.

Python
JavaScript

# Main execution
# If using a local file:
audio_url = upload_file("<YOUR_AUDIO FILE>")

# If using a public URL:
# audio_url = "<YOUR_AUDIO_URL>"

# Transcribe audio
transcript = transcribe_audio(audio_url)
transcript_text = transcript["text"]
transcript_id = transcript["id"]

# Get sentences
print("Getting sentences...")
sentences = get_sentences(transcript_id)

// Main execution
// If using a local file:
const audioUrl = await uploadFile("<YOUR_AUDIO FILE>");

// If using a public URL:
// const audioUrl = "<YOUR_AUDIO_URL>";

// Transcribe audio
const transcript = await transcribeAudio(audioUrl);
const transcriptText = transcript.text;
const transcriptId = transcript.id;

// Get sentences
console.log("Getting sentences...");
const sentences = await getSentences(transcriptId);

Now we can iterate over all of the sentences in our transcript and create embeddings for them to use as part of our Semantic Search later. We’ll be relying on SentenceTransformer’s multi-qa-mpnet-base-dot-v1 model, which has been fine-tuned specifically for Semantic Search, and is their highest-performing model for this task. By default, we’ll group 5 sentences together while having 2 of them overlap when the window moves. This should give us quotes around 30 seconds in length at most.

Python
JavaScript

# Initialize embedder
embedder = SentenceTransformer("multi-qa-mpnet-base-dot-v1")
embeddings = {}

# Create sliding window of sentences and generate embeddings
print("Creating embeddings...")
sentence_groups = sliding_window(sentences, 5, 2)

for sentence_group in sentence_groups:
    combined_text = " ".join([sentence["text"] for sentence in sentence_group])
    start = sentence_group[0]["start"]
    end = sentence_group[-1]["end"]

    embeddings[(start, end, transcript_id, combined_text)] = embedder.encode(combined_text)

// Initialize embedder
const embedder = await pipeline(
  "feature-extraction",
  "Xenova/multi-qa-mpnet-base-dot-v1"
);
const embeddingKeys = [];
const embeddingValues = [];

// Create sliding window of sentences and generate embeddings
console.log("Creating embeddings...");
const sentenceGroups = slidingWindow(sentences, 5, 2);

for (const sentenceGroup of sentenceGroups) {
  const combinedText = sentenceGroup.map((s) => s.text).join(" ");
  const start = sentenceGroup[0].start;
  const end = sentenceGroup[sentenceGroup.length - 1].end;

  const output = await embedder(combinedText, {
    pooling: "mean",
    normalize: true,
  });
  const embedding = Array.from(output.data);

  embeddingKeys.push({ start, end, transcriptId, combinedText });
  embeddingValues.push(embedding);
}

Now we can query LLM Gateway to provide the type of quotes we want. In this case, let’s prompt LLM Gateway to find the best 3 quotes out of a video that we transcribed.

Python
JavaScript

print("Asking LLM Gateway for best quotes...")
question = "What are the 3 best quotes from this video?"
context = "Please provide exactly 3 quotes."

llm_answer = process_with_llm_gateway(transcript_text, question, context)
print(f"\nLLM Gateway Response:\n{llm_answer}\n")

console.log("Asking LLM Gateway for best quotes...");
const question = "What are the 3 best quotes from this video?";
const context = "Please provide exactly 3 quotes.";

const llmAnswer = await processWithLlmGateway(
  transcriptText,
  question,
  context
);
console.log(`\nLLM Gateway Response:\n${llmAnswer}\n`);

Now we can take the embeddings from the transcript text, as well as the embeddings from LLM Gateway’s output, and use them in our k-nearest neighbors algorithm to determine their similarity. The most similar quotes to what LLM Gateway identified will be surfaced as our 3 best quotes, along with their timestamps and confidence scores. We’ll be relying on cosine similarity rather than the default Euclidean distance metric since it takes into account both the magnitude and direction of our vectors.

Python
JavaScript

# Embed the LLM output
llm_gateway_embedding = embedder.encode(llm_answer)

# Vectorize transcript embeddings
np_embeddings = np.array(list(embeddings.values()))
metadata = list(embeddings.keys())

# Find the top 3 most similar quotes
print("Finding matching quotes in transcript...")
knn = NearestNeighbors(n_neighbors=3, metric="cosine")
knn.fit(np_embeddings)
distances, indices = knn.kneighbors([llm_gateway_embedding])

matches = []
for distance, index in zip(distances[0], indices[0]):
    result_metadata = metadata[index]
    matches.append(
        {
            "start_timestamp": result_metadata[0],
            "end_timestamp": result_metadata[1],
            "transcript_id": result_metadata[2],
            "text": result_metadata[3],
            "confidence": 1 - distance,
        }
    )

# Display results
print("\n" + "="*80)
print("BEST MATCHING QUOTES FROM TRANSCRIPT:")
print("="*80 + "\n")

for index, m in enumerate(matches):
    print('QUOTE #{}: "{}"'.format(index + 1, m['text']))
    print('START TIMESTAMP:', str(datetime.timedelta(seconds=m['start_timestamp']/1000)))
    print('END TIMESTAMP:', str(datetime.timedelta(seconds=m['end_timestamp']/1000)))
    print('CONFIDENCE:', m['confidence'])
    print()

// Embed the LLM output
const llmOutput = await embedder(llmAnswer, {
  pooling: "mean",
  normalize: true,
});
const llmGatewayEmbedding = Array.from(llmOutput.data);

// Find the top 3 most similar quotes
console.log("Finding matching quotes in transcript...");

function cosineSimilarity(a, b) {
  const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
  const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
  const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
  return dot / (magA * magB);
}

function msToTime(ms) {
  const totalSeconds = Math.floor(ms / 1000);
  const h = Math.floor(totalSeconds / 3600);
  const m = Math.floor((totalSeconds % 3600) / 60);
  const s = totalSeconds % 60;
  return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
}

const similarities = embeddingKeys.map((key, idx) => ({
  key,
  similarity: cosineSimilarity(llmGatewayEmbedding, embeddingValues[idx]),
}));
similarities.sort((a, b) => b.similarity - a.similarity);
const matches = similarities.slice(0, 3).map(({ key, similarity }) => ({
  startTimestamp: key.start,
  endTimestamp: key.end,
  transcriptId: key.transcriptId,
  text: key.combinedText,
  confidence: similarity,
}));

// Display results
console.log("\n" + "=".repeat(80));
console.log("BEST MATCHING QUOTES FROM TRANSCRIPT:");
console.log("=".repeat(80) + "\n");

matches.forEach((m, index) => {
  console.log(`QUOTE #${index + 1}: "${m.text}"`);
  console.log("START TIMESTAMP:", msToTime(m.startTimestamp));
  console.log("END TIMESTAMP:", msToTime(m.endTimestamp));
  console.log("CONFIDENCE:", m.confidence);
  console.log();
});

Documentation Index

​Quickstart

​Getting Started

​Step-by-Step Instructions

Quickstart

Getting Started

Step-by-Step Instructions