Transcription with JavaScript SDK

Comprehensive guide to using all transcription features with the VerbalisAI JavaScript SDK.

Basic Transcription

Simple Audio Transcription

import { VerbalisAI } from '@verbalisai/sdk';

const client = new VerbalisAI();

async function basicTranscription() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/audio.mp3'
    });
    
    console.log('Transcription:', transcription.text);
    console.log(`Duration: ${transcription.duration} seconds`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

basicTranscription();

Model Selection

async function modelComparison() {
  const audioUrl = 'https://example.com/audio.mp3';
  
  try {
    // Nano model - fastest, English only
    const nanoResult = await client.transcriptions.create({
      audioUrl,
      model: 'nano'  // 3x faster than mini
    });
    
    // Mini model - balanced speed/accuracy  
    const miniResult = await client.transcriptions.create({
      audioUrl,
      model: 'mini'  // Default, good for most use cases
    });
    
    // Pro model - highest accuracy
    const proResult = await client.transcriptions.create({
      audioUrl,
      model: 'pro'  // Best accuracy, slower processing
    });
    
    console.log(`Nano: ${nanoResult.text}`);
    console.log(`Mini: ${miniResult.text}`);
    console.log(`Pro: ${proResult.text}`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

modelComparison();

Language Detection & Selection

Automatic Language Detection

async function autoLanguage() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/multilingual.mp3',
      language: 'auto'  // Default - detects language automatically
    });
    
    console.log(`Detected language: ${transcription.detectedLanguage}`);
    console.log(`Confidence: ${transcription.languageConfidence}`);
    console.log(`Text: ${transcription.text}`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

autoLanguage();

Specific Language Selection

async function specificLanguages() {
  const urls = {
    english: 'https://example.com/english.mp3',
    spanish: 'https://example.com/spanish.mp3',
    french: 'https://example.com/french.mp3'
  };
  
  const languages = ['en', 'es', 'fr'];
  const results = {};
  
  try {
    for (const [name, url] of Object.entries(urls)) {
      const lang = languages[Object.keys(urls).indexOf(name)];
      
      const transcription = await client.transcriptions.create({
        audioUrl: url,
        language: lang
      });
      
      results[name] = transcription.text;
    }
    
    console.log('Results:', results);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

specificLanguages();

Supported Languages

async function listSupportedLanguages() {
  try {
    // Get list of supported languages
    const languages = await client.languages.list();
    
    languages.forEach(lang => {
      console.log(`${lang.code}: ${lang.name} (${lang.accuracy}% avg accuracy)`);
    });
  } catch (error) {
    console.error('Error:', error.message);
  }
}

listSupportedLanguages();

Advanced Features

Speaker Diarization

async function speakerDiarization() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/meeting.mp3',
      model: 'mini',
      diarize: true,  // Enable speaker identification
      timestampStyle: 'word'  // Get word-level timestamps
    });
    
    // Group by speakers
    const speakers = {};
    transcription.segments.forEach(segment => {
      const speakerId = segment.speakerId || 'Unknown';
      if (!speakers[speakerId]) {
        speakers[speakerId] = [];
      }
      speakers[speakerId].push(segment);
    });
    
    // Print conversation by speaker
    Object.entries(speakers).forEach(([speakerId, segments]) => {
      console.log(`\n${speakerId}:`);
      segments.forEach(segment => {
        console.log(`  [${segment.start.toFixed(1)}s] ${segment.text}`);
      });
    });
  } catch (error) {
    console.error('Error:', error.message);
  }
}

speakerDiarization();

Topic Detection

async function topicDetection() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/business-call.mp3',
      topics: true,  // Enable topic detection
      model: 'pro'   // Pro model gives better topic accuracy
    });
    
    console.log('Detected Topics:');
    transcription.topics.forEach(topic => {
      console.log(`  - ${topic}`);
    });
    
    // Topics with confidence scores (if available)
    if (transcription.topicDetails) {
      transcription.topicDetails.forEach(detail => {
        console.log(`  ${detail.topic}: ${detail.confidence.toFixed(2)}`);
      });
    }
  } catch (error) {
    console.error('Error:', error.message);
  }
}

topicDetection();

Text Summarization

async function textSummarization() {
  const formats = ['bullets', 'paragraphs', 'markdown'];
  
  for (const formatType of formats) {
    try {
      const transcription = await client.transcriptions.create({
        audioUrl: 'https://example.com/long-meeting.mp3',
        summarization: true,
        summaryType: formatType,
        summaryLanguage: 'en'
      });
      
      console.log(`\n${formatType.toUpperCase()} SUMMARY:`);
      console.log(transcription.summary.text);
      console.log(`Summary length: ${transcription.summary.text.length} chars`);
    } catch (error) {
      console.error(`Error with ${formatType}:`, error.message);
    }
  }
}

textSummarization();

Entity Detection

async function entityDetection() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/business-call.mp3',
      entityDetection: true,
      entityTypes: [
        'person',
        'organization', 
        'location',
        'phoneNumber',
        'email',
        'date',
        'product'
      ]
    });
    
    // Group entities by type
    const entitiesByType = {};
    transcription.entities.forEach(entity => {
      if (!entitiesByType[entity.type]) {
        entitiesByType[entity.type] = [];
      }
      entitiesByType[entity.type].push(entity);
    });
    
    // Print entities by type
    Object.entries(entitiesByType).forEach(([type, entities]) => {
      console.log(`\n${type.toUpperCase()}:`);
      entities.forEach(entity => {
        console.log(`  - ${entity.text} (confidence: ${entity.confidence.toFixed(2)})`);
      });
    });
  } catch (error) {
    console.error('Error:', error.message);
  }
}

entityDetection();

Privacy & PII Redaction

Basic PII Redaction

async function basicPiiRedaction() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/customer-call.mp3',
      redactPii: true,
      redactPiiPolicies: [
        'person',
        'phoneNumber', 
        'email',
        'ssn',
        'creditCard'
      ],
      redactPiiSub: 'hash'  // Options: hash, mask, remove
    });
    
    console.log('Original audio contained PII, here\'s the redacted version:');
    console.log(transcription.text);
    console.log(`\nPII types found and redacted: ${transcription.redactedPiiTypes}`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

basicPiiRedaction();

Advanced PII Redaction

async function advancedPiiRedaction() {
  const redactionMethods = {
    hash: 'Replace with [REDACTED_HASH_123456]',
    mask: 'Replace with [***]', 
    remove: 'Remove completely'
  };
  
  for (const [method, description] of Object.entries(redactionMethods)) {
    try {
      const transcription = await client.transcriptions.create({
        audioUrl: 'https://example.com/sensitive-call.mp3',
        redactPii: true,
        redactPiiPolicies: [
          'person',
          'phoneNumber',
          'email', 
          'creditCard',
          'bankAccount'
        ],
        redactPiiSub: method
      });
      
      console.log(`\nREDACTION METHOD: ${method} (${description})`);
      console.log(transcription.text);
    } catch (error) {
      console.error(`Error with ${method}:`, error.message);
    }
  }
}

advancedPiiRedaction();

Healthcare PII Redaction

async function healthcarePii() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/patient-consultation.mp3',
      redactPii: true,
      redactPiiPolicies: [
        'person',           // Patient names
        'medicalId',        // Medical record numbers
        'phoneNumber',      // Contact information
        'email',           // Email addresses
        'address',         // Home addresses
        'date',            // Birth dates, appointment dates
        'insuranceId'      // Insurance information
      ],
      redactPiiSub: 'hash'
    });
    
    console.log('Healthcare transcription with PII redacted:');
    console.log(transcription.text);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

healthcarePii();

Timestamp Control

Segment vs Word Timestamps

async function timestampComparison() {
  const audioUrl = 'https://example.com/speech.mp3';
  
  try {
    // Segment-level timestamps (default)
    const segmentTranscription = await client.transcriptions.create({
      audioUrl,
      timestampStyle: 'segment'
    });
    
    console.log('SEGMENT TIMESTAMPS:');
    segmentTranscription.segments.forEach(segment => {
      console.log(`[${segment.start.toFixed(1)}s - ${segment.end.toFixed(1)}s]: ${segment.text}`);
    });
    
    // Word-level timestamps (more precise)
    const wordTranscription = await client.transcriptions.create({
      audioUrl,
      timestampStyle: 'word'
    });
    
    console.log('\nWORD TIMESTAMPS:');
    wordTranscription.segments.forEach(segment => {
      let output = `[${segment.start.toFixed(1)}s]: `;
      if (segment.words) {
        segment.words.forEach(word => {
          output += `${word.text}(${word.start.toFixed(1)}s) `;
        });
      }
      console.log(output);
    });
  } catch (error) {
    console.error('Error:', error.message);
  }
}

timestampComparison();

Audio Slicing

async function audioSlicing() {
  try {
    // Transcribe only a portion of the audio
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/long-audio.mp3',
      audioStartFrom: 60,    // Start from 1 minute
      audioEndAt: 300,       // End at 5 minutes
      model: 'mini'
    });
    
    console.log('Transcribed audio from 1:00 to 5:00:');
    console.log(transcription.text);
    console.log(`Duration of transcribed portion: ${transcription.duration}s`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

audioSlicing();

Content Safety

Content Filtering

async function contentSafety() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/user-content.mp3',
      contentSafety: true,  // Enable content safety filtering
      model: 'pro'
    });
    
    if (transcription.contentFlags && transcription.contentFlags.length > 0) {
      console.log('Content flags detected:');
      transcription.contentFlags.forEach(flag => {
        console.log(`  - ${flag.type}: ${flag.description} (confidence: ${flag.confidence})`);
      });
    } else {
      console.log('No content safety issues detected');
    }
    
    console.log(`\nTranscription: ${transcription.text}`);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

contentSafety();

Async Processing & Webhooks

Non-blocking Transcription

async function asyncTranscription() {
  try {
    // Start transcription without waiting
    let transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/very-long-audio.mp3',
      model: 'pro',
      waitUntilComplete: false  // Don't wait for completion
    });
    
    console.log(`Transcription started: ${transcription.id}`);
    console.log(`Status: ${transcription.status}`);
    
    // Poll for completion
    while (transcription.status === 'processing') {
      await new Promise(resolve => setTimeout(resolve, 5000));  // Wait 5 seconds
      transcription = await client.transcriptions.get(transcription.id);
      console.log(`Status: ${transcription.status}`);
    }
    
    if (transcription.status === 'completed') {
      console.log(`Transcription completed: ${transcription.text}`);
    } else {
      console.log(`Transcription failed: ${transcription.error}`);
    }
  } catch (error) {
    console.error('Error:', error.message);
  }
}

asyncTranscription();

Webhook Integration

async function webhookTranscription() {
  try {
    // Start transcription with webhook notification
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/audio.mp3',
      model: 'pro',
      
      // Webhook configuration
      webhookUrl: 'https://yoursite.com/webhooks/transcription',
      webhookAuthHeaderName: 'Authorization',
      webhookAuthHeaderValue: 'Bearer your-webhook-secret',
      
      // Additional features
      topics: true,
      summarization: true,
      entityDetection: true,
      
      waitUntilComplete: false  // Use webhook instead of waiting
    });
    
    console.log(`Transcription started: ${transcription.id}`);
    console.log('You\'ll receive a webhook notification when complete');
  } catch (error) {
    console.error('Error:', error.message);
  }
}

webhookTranscription();

Batch Processing

Process Multiple Files

async function batchTranscription() {
  const audioFiles = [
    'https://example.com/audio1.mp3',
    'https://example.com/audio2.mp3', 
    'https://example.com/audio3.mp3',
    'https://example.com/audio4.mp3',
    'https://example.com/audio5.mp3'
  ];
  
  try {
    // Process all files concurrently
    const promises = audioFiles.map(url => 
      client.transcriptions.create({
        audioUrl: url,
        model: 'mini',
        topics: true
      }).catch(error => ({ error: error.message, url }))
    );
    
    // Wait for all to complete
    const results = await Promise.all(promises);
    
    // Process results
    let successful = 0;
    let failed = 0;
    
    results.forEach((result, index) => {
      if (result.error) {
        console.log(`File ${index + 1} failed: ${result.error}`);
        failed++;
      } else {
        console.log(`File ${index + 1} completed: ${result.text.length} characters`);
        if (result.topics) {
          console.log(`  Topics: ${result.topics.join(', ')}`);
        }
        successful++;
      }
    });
    
    console.log(`\nBatch complete: ${successful} successful, ${failed} failed`);
  } catch (error) {
    console.error('Batch error:', error.message);
  }
}

batchTranscription();

Rate-Limited Batch Processing

class RateLimitedProcessor {
  constructor(maxConcurrent = 5) {
    this.maxConcurrent = maxConcurrent;
    this.running = 0;
    this.queue = [];
  }
  
  async process(fn) {
    return new Promise((resolve, reject) => {
      this.queue.push({ fn, resolve, reject });
      this.processNext();
    });
  }
  
  async processNext() {
    if (this.running >= this.maxConcurrent || this.queue.length === 0) {
      return;
    }
    
    this.running++;
    const { fn, resolve, reject } = this.queue.shift();
    
    try {
      const result = await fn();
      resolve(result);
    } catch (error) {
      reject(error);
    } finally {
      this.running--;
      this.processNext();
    }
  }
}

async function rateLimitedBatch() {
  const audioFiles = Array.from({ length: 20 }, (_, i) => 
    `https://example.com/audio${i + 1}.mp3`
  );
  
  const processor = new RateLimitedProcessor(5); // Max 5 concurrent requests
  
  try {
    const promises = audioFiles.map(url => 
      processor.process(async () => {
        try {
          const result = await client.transcriptions.create({
            audioUrl: url,
            model: 'mini'
          });
          return { url, success: true, result };
        } catch (error) {
          return { url, success: false, error: error.message };
        }
      })
    );
    
    const results = await Promise.all(promises);
    
    // Analyze results
    const successful = results.filter(r => r.success);
    const failed = results.filter(r => !r.success);
    
    console.log(`Processed ${audioFiles.length} files:`);
    console.log(`  Successful: ${successful.length}`);
    console.log(`  Failed: ${failed.length}`);
    
    if (failed.length > 0) {
      console.log('\nFailed files:');
      failed.forEach(failure => {
        console.log(`  ${failure.url}: ${failure.error}`);
      });
    }
  } catch (error) {
    console.error('Batch processing error:', error);
  }
}

rateLimitedBatch();

Error Handling & Retry Logic

Comprehensive Error Handling

import { VerbalisAIError } from '@verbalisai/sdk';

async function robustTranscription() {
  const maxRetries = 3;
  const retryDelay = 2000; // 2 seconds
  
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    try {
      const transcription = await client.transcriptions.create({
        audioUrl: 'https://example.com/audio.mp3',
        model: 'mini'
      });
      
      console.log(`Success on attempt ${attempt + 1}`);
      console.log(transcription.text);
      break;
      
    } catch (error) {
      console.log(`Attempt ${attempt + 1} failed: ${error.message}`);
      
      if (error instanceof VerbalisAIError) {
        // Don't retry on certain errors
        if ([400, 401, 403].includes(error.statusCode)) {
          console.log('Non-retryable error, giving up');
          break;
        }
        
        // Retry on server errors and rate limits
        if (attempt < maxRetries - 1) {
          const waitTime = retryDelay * Math.pow(2, attempt); // Exponential backoff
          console.log(`Retrying in ${waitTime}ms...`);
          await new Promise(resolve => setTimeout(resolve, waitTime));
        } else {
          console.log('Max retries exceeded');
        }
      } else {
        console.log(`Unexpected error: ${error.message}`);
        break;
      }
    }
  }
}

robustTranscription();

Performance Optimization

Memory-Efficient Processing

class StreamingTranscriptionProcessor {
  constructor(client) {
    this.client = client;
  }
  
  async processLargeFile(audioUrl) {
    try {
      // Start transcription with streaming enabled
      const transcription = await this.client.transcriptions.create({
        audioUrl,
        model: 'mini',
        streamSegments: true // Enable segment streaming
      });
      
      // Process segments as they arrive
      if (transcription.segmentsStream) {
        for await (const segment of transcription.segmentsStream) {
          console.log(`[${segment.start.toFixed(1)}s]: ${segment.text}`);
          
          // Process each segment immediately
          await this.processSegment(segment);
        }
      }
      
      return transcription;
    } catch (error) {
      console.error('Streaming error:', error);
      throw error;
    }
  }
  
  async processSegment(segment) {
    // Your segment processing logic here
    // (save to database, analyze, etc.)
  }
}

// Usage
const processor = new StreamingTranscriptionProcessor(client);
processor.processLargeFile('https://example.com/large-audio.mp3');

Utility Functions

Export to Different Formats

class TranscriptionExporter {
  static toSRT(transcription) {
    let srt = '';
    transcription.segments.forEach((segment, index) => {
      const startTime = this.formatSRTTime(segment.start);
      const endTime = this.formatSRTTime(segment.end);
      
      srt += `${index + 1}\n`;
      srt += `${startTime} --> ${endTime}\n`;
      srt += `${segment.text}\n\n`;
    });
    return srt;
  }
  
  static toVTT(transcription) {
    let vtt = 'WEBVTT\n\n';
    transcription.segments.forEach(segment => {
      const startTime = this.formatVTTTime(segment.start);
      const endTime = this.formatVTTTime(segment.end);
      
      vtt += `${startTime} --> ${endTime}\n`;
      vtt += `${segment.text}\n\n`;
    });
    return vtt;
  }
  
  static toText(transcription) {
    return transcription.text;
  }
  
  static toJSON(transcription) {
    return JSON.stringify(transcription, null, 2);
  }
  
  static formatSRTTime(seconds) {
    const hours = Math.floor(seconds / 3600);
    const minutes = Math.floor((seconds % 3600) / 60);
    const secs = Math.floor(seconds % 60);
    const ms = Math.floor((seconds % 1) * 1000);
    
    return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')},${ms.toString().padStart(3, '0')}`;
  }
  
  static formatVTTTime(seconds) {
    const hours = Math.floor(seconds / 3600);
    const minutes = Math.floor((seconds % 3600) / 60);
    const secs = (seconds % 60).toFixed(3);
    
    return `${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${secs.padStart(6, '0')}`;
  }
}

// Usage
async function exportFormats() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/audio.mp3',
      timestampStyle: 'word',
      diarize: true
    });
    
    // Export in different formats
    const srtContent = TranscriptionExporter.toSRT(transcription);
    const vttContent = TranscriptionExporter.toVTT(transcription);
    const txtContent = TranscriptionExporter.toText(transcription);
    const jsonContent = TranscriptionExporter.toJSON(transcription);
    
    // In Node.js, you can write to files
    if (typeof require !== 'undefined') {
      const fs = require('fs').promises;
      await fs.writeFile('transcription.srt', srtContent);
      await fs.writeFile('transcription.vtt', vttContent);
      await fs.writeFile('transcription.txt', txtContent);
      await fs.writeFile('transcription.json', jsonContent);
      console.log('Files exported successfully');
    }
    
    // In browser, you can create download links
    if (typeof document !== 'undefined') {
      createDownloadLink('transcription.srt', srtContent);
      createDownloadLink('transcription.vtt', vttContent);
      createDownloadLink('transcription.txt', txtContent);
      createDownloadLink('transcription.json', jsonContent);
    }
  } catch (error) {
    console.error('Export error:', error);
  }
}

function createDownloadLink(filename, content) {
  const blob = new Blob([content], { type: 'text/plain' });
  const url = URL.createObjectURL(blob);
  const a = document.createElement('a');
  a.href = url;
  a.download = filename;
  document.body.appendChild(a);
  a.click();
  document.body.removeChild(a);
  URL.revokeObjectURL(url);
}

exportFormats();

Search Within Transcriptions

class TranscriptionSearcher {
  constructor(transcription) {
    this.transcription = transcription;
  }
  
  search(term, options = {}) {
    const { caseSensitive = false, wholeWord = false } = options;
    const text = caseSensitive ? this.transcription.text : this.transcription.text.toLowerCase();
    const searchTerm = caseSensitive ? term : term.toLowerCase();
    
    const matches = [];
    let index = 0;
    
    while (index < text.length) {
      const foundIndex = text.indexOf(searchTerm, index);
      if (foundIndex === -1) break;
      
      // Check for whole word match
      if (wholeWord) {
        const charBefore = foundIndex > 0 ? text[foundIndex - 1] : ' ';
        const charAfter = foundIndex + searchTerm.length < text.length 
          ? text[foundIndex + searchTerm.length] : ' ';
        
        if (!/\w/.test(charBefore) && !/\w/.test(charAfter)) {
          matches.push(this.createMatch(foundIndex, searchTerm.length));
        }
      } else {
        matches.push(this.createMatch(foundIndex, searchTerm.length));
      }
      
      index = foundIndex + 1;
    }
    
    return matches;
  }
  
  createMatch(startIndex, length) {
    // Find the segment containing this match
    const segment = this.findSegmentForIndex(startIndex);
    
    return {
      startIndex,
      endIndex: startIndex + length,
      text: this.transcription.text.substring(startIndex, startIndex + length),
      timestamp: segment ? segment.start : 0,
      context: this.getContext(startIndex, 50)
    };
  }
  
  findSegmentForIndex(textIndex) {
    let currentIndex = 0;
    
    for (const segment of this.transcription.segments) {
      const segmentEnd = currentIndex + segment.text.length;
      if (textIndex >= currentIndex && textIndex < segmentEnd) {
        return segment;
      }
      currentIndex = segmentEnd + 1; // +1 for space between segments
    }
    
    return null;
  }
  
  getContext(index, contextLength) {
    const start = Math.max(0, index - contextLength);
    const end = Math.min(this.transcription.text.length, index + contextLength);
    return this.transcription.text.substring(start, end);
  }
}

// Usage
async function searchTranscription() {
  try {
    const transcription = await client.transcriptions.create({
      audioUrl: 'https://example.com/meeting.mp3',
      timestampStyle: 'word'
    });
    
    const searcher = new TranscriptionSearcher(transcription);
    const searchTerms = ['action items', 'deadline', 'budget'];
    
    searchTerms.forEach(term => {
      const matches = searcher.search(term, { caseSensitive: false });
      
      if (matches.length > 0) {
        console.log(`\nFound '${term}' ${matches.length} times:`);
        matches.forEach(match => {
          console.log(`  [${match.timestamp.toFixed(1)}s]: ...${match.context}...`);
        });
      }
    });
  } catch (error) {
    console.error('Search error:', error);
  }
}

searchTranscription();

Ready to explore file storage? Check out the File Storage guide to learn about uploading and managing audio files with the JavaScript SDK.