Better whisper logging
This commit is contained in:
parent
ebcf3329f5
commit
9ee467b9cd
|
@ -127,7 +127,7 @@
|
||||||
"create-torrent": "^6.0.15",
|
"create-torrent": "^6.0.15",
|
||||||
"deep-object-diff": "^1.1.0",
|
"deep-object-diff": "^1.1.0",
|
||||||
"email-templates": "^11.0.3",
|
"email-templates": "^11.0.3",
|
||||||
"execa": "^9.2.0",
|
"execa": "^9.3.0",
|
||||||
"express": "^4.18.1",
|
"express": "^4.18.1",
|
||||||
"express-rate-limit": "^7.1.1",
|
"express-rate-limit": "^7.1.1",
|
||||||
"express-validator": "^7.0.1",
|
"express-validator": "^7.0.1",
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
import { createLogger, transports } from 'winston'
|
||||||
|
|
||||||
|
export function createConsoleLogger () {
|
||||||
|
return createLogger({ transports: [ new transports.Console() ] })
|
||||||
|
}
|
|
@ -1,5 +1,5 @@
|
||||||
import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription'
|
import { TranscriptionEngineName, transcriberFactory } from '@peertube/peertube-transcription'
|
||||||
import { createLogger } from 'winston'
|
import { createConsoleLogger } from '@tests/shared/common.js'
|
||||||
|
|
||||||
describe('Transcriber factory', function () {
|
describe('Transcriber factory', function () {
|
||||||
const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ]
|
const transcribers: TranscriptionEngineName[] = [ 'openai-whisper', 'whisper-ctranslate2' ]
|
||||||
|
@ -8,9 +8,8 @@ describe('Transcriber factory', function () {
|
||||||
|
|
||||||
for (const transcriberName of transcribers) {
|
for (const transcriberName of transcribers) {
|
||||||
it(`Should be able to create a(n) ${transcriberName} transcriber`, function () {
|
it(`Should be able to create a(n) ${transcriberName} transcriber`, function () {
|
||||||
transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createLogger() })
|
transcriberFactory.createFromEngineName({ engineName: transcriberName, logger: createConsoleLogger() })
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -7,12 +7,12 @@ import {
|
||||||
WhisperBuiltinModel
|
WhisperBuiltinModel
|
||||||
} from '@peertube/peertube-transcription'
|
} from '@peertube/peertube-transcription'
|
||||||
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
|
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
|
||||||
|
import { createConsoleLogger } from '@tests/shared/common.js'
|
||||||
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
|
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
|
||||||
import { config, expect } from 'chai'
|
import { config, expect } from 'chai'
|
||||||
import { ensureDir, remove } from 'fs-extra/esm'
|
import { ensureDir, remove } from 'fs-extra/esm'
|
||||||
import { tmpdir } from 'node:os'
|
import { tmpdir } from 'node:os'
|
||||||
import { join } from 'node:path'
|
import { join } from 'node:path'
|
||||||
import { createLogger } from 'winston'
|
|
||||||
|
|
||||||
config.truncateThreshold = 0
|
config.truncateThreshold = 0
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ describe('Open AI Whisper transcriber', function () {
|
||||||
languageDetection: true,
|
languageDetection: true,
|
||||||
version: ''
|
version: ''
|
||||||
},
|
},
|
||||||
logger: createLogger()
|
logger: createConsoleLogger()
|
||||||
})
|
})
|
||||||
const model = new TranscriptionModel('tiny')
|
const model = new TranscriptionModel('tiny')
|
||||||
|
|
||||||
|
|
|
@ -7,12 +7,12 @@ import {
|
||||||
TranscriptionModel
|
TranscriptionModel
|
||||||
} from '@peertube/peertube-transcription'
|
} from '@peertube/peertube-transcription'
|
||||||
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
|
import { TranscriptFileEvaluator, levenshteinDistance } from '@peertube/peertube-transcription-devtools'
|
||||||
|
import { createConsoleLogger } from '@tests/shared/common.js'
|
||||||
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
|
import { downloadCustomModelsIfNeeded, getCustomModelPath } from '@tests/shared/transcription.js'
|
||||||
import { config, expect } from 'chai'
|
import { config, expect } from 'chai'
|
||||||
import { ensureDir, remove } from 'fs-extra/esm'
|
import { ensureDir, remove } from 'fs-extra/esm'
|
||||||
import { tmpdir } from 'node:os'
|
import { tmpdir } from 'node:os'
|
||||||
import { join } from 'node:path'
|
import { join } from 'node:path'
|
||||||
import { createLogger } from 'winston'
|
|
||||||
|
|
||||||
config.truncateThreshold = 0
|
config.truncateThreshold = 0
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ describe('Whisper CTranslate2 transcriber', function () {
|
||||||
languageDetection: true,
|
languageDetection: true,
|
||||||
version: '0.4.4'
|
version: '0.4.4'
|
||||||
},
|
},
|
||||||
logger: createLogger()
|
logger: createConsoleLogger()
|
||||||
})
|
})
|
||||||
|
|
||||||
const model = new TranscriptionModel('tiny')
|
const model = new TranscriptionModel('tiny')
|
||||||
|
@ -156,7 +156,7 @@ describe('Whisper CTranslate2 transcriber', function () {
|
||||||
supportedModelFormats: [ 'PyTorch' ],
|
supportedModelFormats: [ 'PyTorch' ],
|
||||||
version: '0.4.4'
|
version: '0.4.4'
|
||||||
},
|
},
|
||||||
logger: createLogger()
|
logger: createConsoleLogger()
|
||||||
})
|
})
|
||||||
const openaiTranscript = await openaiTranscriber.transcribe({
|
const openaiTranscript = await openaiTranscriber.transcribe({
|
||||||
...transcribeArgs,
|
...transcribeArgs,
|
||||||
|
|
|
@ -99,7 +99,7 @@ void (async () => {
|
||||||
|
|
||||||
const transcriber = transcriberFactory.createFromEngineName({
|
const transcriber = transcriberFactory.createFromEngineName({
|
||||||
engineName: transcriberName,
|
engineName: transcriberName,
|
||||||
logger: createLogger(),
|
logger: createLogger({ transports: [ new transports.Console() ] }),
|
||||||
binDirectory: join(pipDirectory, 'bin')
|
binDirectory: join(pipDirectory, 'bin')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import { SimpleLogger } from '@peertube/peertube-models'
|
import { SimpleLogger } from '@peertube/peertube-models'
|
||||||
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
|
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
|
||||||
|
import { $ } from 'execa'
|
||||||
import { PerformanceObserver } from 'node:perf_hooks'
|
import { PerformanceObserver } from 'node:perf_hooks'
|
||||||
import { join } from 'path'
|
import { join } from 'path'
|
||||||
import { TranscriptFile, TranscriptFormat } from './transcript-file.js'
|
import { TranscriptFile, TranscriptFormat } from './transcript-file.js'
|
||||||
|
@ -75,6 +76,26 @@ export abstract class AbstractTranscriber {
|
||||||
return this.engine.command
|
return this.engine.command
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected getExec (env?: { [ id: string ]: string }) {
|
||||||
|
const logLevels = {
|
||||||
|
command: 'debug',
|
||||||
|
output: 'debug',
|
||||||
|
ipc: 'debug',
|
||||||
|
error: 'error',
|
||||||
|
duration: 'debug'
|
||||||
|
}
|
||||||
|
|
||||||
|
return $({
|
||||||
|
verbose: (_verboseLine, { message, ...verboseObject }) => {
|
||||||
|
const level = logLevels[verboseObject.type]
|
||||||
|
|
||||||
|
this.logger[level](message, verboseObject)
|
||||||
|
},
|
||||||
|
|
||||||
|
env
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
abstract transcribe (options: TranscribeArgs): Promise<TranscriptFile>
|
abstract transcribe (options: TranscribeArgs): Promise<TranscriptFile>
|
||||||
|
|
||||||
abstract install (path: string): Promise<void>
|
abstract install (path: string): Promise<void>
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
import { SimpleLogger } from '@peertube/peertube-models'
|
import { SimpleLogger } from '@peertube/peertube-models'
|
||||||
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
|
import { buildSUUID, SUUID } from '@peertube/peertube-node-utils'
|
||||||
import { createLogger } from 'winston'
|
|
||||||
|
|
||||||
export class TranscriptionRun {
|
export class TranscriptionRun {
|
||||||
uuid: SUUID
|
uuid: SUUID
|
||||||
logger: SimpleLogger
|
logger: SimpleLogger
|
||||||
|
|
||||||
constructor (logger: SimpleLogger = createLogger(), uuid: SUUID = buildSUUID()) {
|
constructor (logger: SimpleLogger, uuid: SUUID = buildSUUID()) {
|
||||||
this.uuid = uuid
|
this.uuid = uuid
|
||||||
this.logger = logger
|
this.logger = logger
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import { buildSUUID } from '@peertube/peertube-node-utils'
|
import { buildSUUID } from '@peertube/peertube-node-utils'
|
||||||
import { $ } from 'execa'
|
|
||||||
import assert from 'node:assert'
|
import assert from 'node:assert'
|
||||||
import { lstat } from 'node:fs/promises'
|
import { lstat } from 'node:fs/promises'
|
||||||
import { TranscribeArgs } from '../../abstract-transcriber.js'
|
import { TranscribeArgs } from '../../abstract-transcriber.js'
|
||||||
|
@ -20,7 +19,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
||||||
}: TranscribeArgs): Promise<TranscriptFile> {
|
}: TranscribeArgs): Promise<TranscriptFile> {
|
||||||
this.assertLanguageDetectionAvailable(language)
|
this.assertLanguageDetectionAvailable(language)
|
||||||
|
|
||||||
const $$ = $({ env: this.getExecEnv() })
|
const $$ = this.getExec(this.getExecEnv())
|
||||||
|
|
||||||
if (model.path) {
|
if (model.path) {
|
||||||
assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.')
|
assert(await lstat(model.path).then(stats => stats.isDirectory()), 'Model path must be a path to a directory.')
|
||||||
|
@ -56,7 +55,7 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
||||||
}
|
}
|
||||||
|
|
||||||
async install (directory: string) {
|
async install (directory: string) {
|
||||||
const $$ = $({ verbose: 'full' })
|
const $$ = this.getExec()
|
||||||
|
|
||||||
await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}`
|
await $$`pip3 install -U -t ${directory} whisper-ctranslate2==${this.engine.version}`
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import { buildSUUID } from '@peertube/peertube-node-utils'
|
import { buildSUUID } from '@peertube/peertube-node-utils'
|
||||||
import { $ } from 'execa'
|
|
||||||
import { readJSON } from 'fs-extra/esm'
|
import { readJSON } from 'fs-extra/esm'
|
||||||
import { parse } from 'node:path'
|
import { parse } from 'node:path'
|
||||||
import { join, resolve } from 'path'
|
import { join, resolve } from 'path'
|
||||||
|
@ -18,7 +17,8 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
||||||
}: TranscribeArgs): Promise<TranscriptFile> {
|
}: TranscribeArgs): Promise<TranscriptFile> {
|
||||||
this.assertLanguageDetectionAvailable(language)
|
this.assertLanguageDetectionAvailable(language)
|
||||||
|
|
||||||
const $$ = $({ env: this.getExecEnv() })
|
const $$ = this.getExec(this.getExecEnv())
|
||||||
|
|
||||||
const languageArgs = language ? [ '--language', language ] : []
|
const languageArgs = language ? [ '--language', language ] : []
|
||||||
|
|
||||||
this.createRun(runId)
|
this.createRun(runId)
|
||||||
|
@ -64,7 +64,7 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
async install (directory: string) {
|
async install (directory: string) {
|
||||||
const $$ = $({ verbose: 'full' })
|
const $$ = this.getExec()
|
||||||
|
|
||||||
await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}`
|
await $$`pip3 install -U -t ${[ directory ]} openai-whisper==${this.engine.version}`
|
||||||
}
|
}
|
||||||
|
|
|
@ -5346,10 +5346,10 @@ execa@^5.0.0:
|
||||||
signal-exit "^3.0.3"
|
signal-exit "^3.0.3"
|
||||||
strip-final-newline "^2.0.0"
|
strip-final-newline "^2.0.0"
|
||||||
|
|
||||||
execa@^9.2.0:
|
execa@^9.3.0:
|
||||||
version "9.2.0"
|
version "9.3.0"
|
||||||
resolved "https://registry.yarnpkg.com/execa/-/execa-9.2.0.tgz#ec5e9de67a714d0f47ce073d37a851fbf0c2f688"
|
resolved "https://registry.yarnpkg.com/execa/-/execa-9.3.0.tgz#b10b70f52c1a978985e8492cc1fa74795c59963c"
|
||||||
integrity sha512-vpOyYg7UAVKLAWWtRS2gAdgkT7oJbCn0me3gmUmxZih4kd3MF/oo8kNTBTIbkO3yuuF5uB4ZCZfn8BOolITYhg==
|
integrity sha512-l6JFbqnHEadBoVAVpN5dl2yCyfX28WoBAGaoQcNmLLSedOxTxcn2Qa83s8I/PA5i56vWru2OHOtrwF7Om2vqlg==
|
||||||
dependencies:
|
dependencies:
|
||||||
"@sindresorhus/merge-streams" "^4.0.0"
|
"@sindresorhus/merge-streams" "^4.0.0"
|
||||||
cross-spawn "^7.0.3"
|
cross-spawn "^7.0.3"
|
||||||
|
|
Loading…
Reference in New Issue