Add vad_filter to ctranslate transcriber

Helps us to correctly detect the language if there is no voice in the first 30 seconds Also helps to lower hallucinations
2024-07-03 15:07:01 +02:00 · 2024-07-03 15:07:01 +02:00 · c289c86741
parent fb5236f2af
commit c289c86741
2 changed files with 7 additions and 1 deletions
--- a/packages/tests/src/shared/transcription.ts
+++ b/packages/tests/src/shared/transcription.ts
@ -28,7 +28,11 @@ export function getCustomModelPath (modelName: CustomModelName) {

 // ---------------------------------------------------------------------------

-export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
+export async function checkAutoCaption (
+  servers: PeerTubeServer[],
+  uuid: string,
+  captionContains = new RegExp('^WEBVTT\\n\\n00:00.\\d{3} --> 00:')
+) {
  for (const server of servers) {
    const body = await server.captions.list({ videoId: uuid })
    expect(body.total).to.equal(1)
--- a/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts
@ -35,6 +35,8 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
      ...modelArgs,
      '--word_timestamps',
      'True',
+      '--vad_filter',
+      'true',
      '--output_format',
      'all',
      '--output_dir',