Metadata to know if the caption is auto generated

This commit is contained in:
Chocobozzz 2024-06-27 15:29:26 +02:00
parent 1bfb791e05
commit fd4831e502
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
22 changed files with 101 additions and 29 deletions

View File

@ -4,6 +4,7 @@ export * from './cache-file-object.js'
export * from './common-objects.js' export * from './common-objects.js'
export * from './playlist-element-object.js' export * from './playlist-element-object.js'
export * from './playlist-object.js' export * from './playlist-object.js'
export * from './video-caption-object.js'
export * from './video-chapters-object.js' export * from './video-chapters-object.js'
export * from './video-comment-object.js' export * from './video-comment-object.js'
export * from './video-object.js' export * from './video-object.js'

View File

@ -0,0 +1,5 @@
import { ActivityIdentifierObject } from './common-objects.js'
export interface VideoCaptionObject extends ActivityIdentifierObject {
automaticallyGenerated: boolean
}

View File

@ -6,6 +6,7 @@ import {
ActivityTagObject, ActivityTagObject,
ActivityUrlObject ActivityUrlObject
} from './common-objects.js' } from './common-objects.js'
import { VideoCaptionObject } from './video-caption-object.js'
import { VideoChapterObject } from './video-chapters-object.js' import { VideoChapterObject } from './video-chapters-object.js'
export interface VideoObject { export interface VideoObject {
@ -18,7 +19,7 @@ export interface VideoObject {
category: ActivityIdentifierObject category: ActivityIdentifierObject
licence: ActivityIdentifierObject licence: ActivityIdentifierObject
language: ActivityIdentifierObject language: ActivityIdentifierObject
subtitleLanguage: ActivityIdentifierObject[] subtitleLanguage: VideoCaptionObject[]
views: number views: number

View File

@ -73,6 +73,7 @@ export interface VideoExportJSON {
language: string language: string
filename: string filename: string
fileUrl: string fileUrl: string
automaticallyGenerated: boolean
}[] }[]
chapters: { chapters: {

View File

@ -3,5 +3,6 @@ import { VideoConstant } from '../video-constant.model.js'
export interface VideoCaption { export interface VideoCaption {
language: VideoConstant<string> language: VideoConstant<string>
captionPath: string captionPath: string
automaticallyGenerated: boolean
updatedAt: string updatedAt: string
} }

View File

@ -7,7 +7,7 @@
"scripts": { "scripts": {
"build": "tsc", "build": "tsc",
"watch": "tsc -w", "watch": "tsc -w",
"install-dependencies:transcription": "pip install -r ./requirements.txt ../transcription-devtools/requirements.txt" "install-dependencies:transcription": "pip install -r ./requirements.txt -r ../transcription-devtools/requirements.txt"
}, },
"dependencies": {} "dependencies": {}
} }

View File

@ -72,12 +72,14 @@ describe('Test video captions', function () {
expect(caption1.language.id).to.equal('ar') expect(caption1.language.id).to.equal('ar')
expect(caption1.language.label).to.equal('Arabic') expect(caption1.language.label).to.equal('Arabic')
expect(caption1.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-ar.vtt$')) expect(caption1.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-ar.vtt$'))
expect(caption1.automaticallyGenerated).to.be.false
await testCaptionFile(server.url, caption1.captionPath, 'Subtitle good 1.') await testCaptionFile(server.url, caption1.captionPath, 'Subtitle good 1.')
const caption2 = body.data[1] const caption2 = body.data[1]
expect(caption2.language.id).to.equal('zh') expect(caption2.language.id).to.equal('zh')
expect(caption2.language.label).to.equal('Chinese') expect(caption2.language.label).to.equal('Chinese')
expect(caption2.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-zh.vtt$')) expect(caption2.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-zh.vtt$'))
expect(caption1.automaticallyGenerated).to.be.false
await testCaptionFile(server.url, caption2.captionPath, 'Subtitle good 2.') await testCaptionFile(server.url, caption2.captionPath, 'Subtitle good 2.')
} }
}) })

View File

@ -13,7 +13,7 @@ import {
waitJobs waitJobs
} from '@peertube/peertube-server-commands' } from '@peertube/peertube-server-commands'
import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js' import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js' import { checkAutoCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
describe('Test video transcription', function () { describe('Test video transcription', function () {
let servers: PeerTubeServer[] let servers: PeerTubeServer[]
@ -48,7 +48,7 @@ describe('Test video transcription', function () {
await waitJobs(servers) await waitJobs(servers)
await checkLanguage(servers, uuid, 'en') await checkLanguage(servers, uuid, 'en')
await checkCaption(servers, uuid) await checkAutoCaption(servers, uuid)
}) })
it('Should run transcription on upload by default', async function () { it('Should run transcription on upload by default', async function () {
@ -57,7 +57,7 @@ describe('Test video transcription', function () {
const uuid = await uploadForTranscription(servers[0]) const uuid = await uploadForTranscription(servers[0])
await waitJobs(servers) await waitJobs(servers)
await checkCaption(servers, uuid) await checkAutoCaption(servers, uuid)
await checkLanguage(servers, uuid, 'en') await checkLanguage(servers, uuid, 'en')
}) })
@ -73,7 +73,7 @@ describe('Test video transcription', function () {
}) })
await waitJobs(servers) await waitJobs(servers)
await checkCaption(servers, video.uuid) await checkAutoCaption(servers, video.uuid)
await checkLanguage(servers, video.uuid, 'en') await checkLanguage(servers, video.uuid, 'en')
}) })
@ -96,7 +96,7 @@ describe('Test video transcription', function () {
await servers[0].live.waitUntilReplacedByReplay({ videoId: video.id }) await servers[0].live.waitUntilReplacedByReplay({ videoId: video.id })
await waitJobs(servers) await waitJobs(servers)
await checkCaption(servers, video.uuid, 'WEBVTT\n\n00:') await checkAutoCaption(servers, video.uuid, 'WEBVTT\n\n00:')
await checkLanguage(servers, video.uuid, 'en') await checkLanguage(servers, video.uuid, 'en')
await servers[0].config.enableLive({ allowReplay: false }) await servers[0].config.enableLive({ allowReplay: false })

View File

@ -13,7 +13,7 @@ import {
} from '@peertube/peertube-server-commands' } from '@peertube/peertube-server-commands'
import { checkPeerTubeRunnerCacheIsEmpty } from '@tests/shared/directories.js' import { checkPeerTubeRunnerCacheIsEmpty } from '@tests/shared/directories.js'
import { PeerTubeRunnerProcess } from '@tests/shared/peertube-runner-process.js' import { PeerTubeRunnerProcess } from '@tests/shared/peertube-runner-process.js'
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js' import { checkAutoCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
describe('Test transcription in peertube-runner program', function () { describe('Test transcription in peertube-runner program', function () {
let servers: PeerTubeServer[] = [] let servers: PeerTubeServer[] = []
@ -46,7 +46,7 @@ describe('Test transcription in peertube-runner program', function () {
const uuid = await uploadForTranscription(servers[0]) const uuid = await uploadForTranscription(servers[0])
await waitJobs(servers, { runnerJobs: true }) await waitJobs(servers, { runnerJobs: true })
await checkCaption(servers, uuid) await checkAutoCaption(servers, uuid)
await checkLanguage(servers, uuid, 'en') await checkLanguage(servers, uuid, 'en')
}) })

View File

@ -28,7 +28,7 @@ export function getCustomModelPath (modelName: CustomModelName) {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
export async function checkCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') { export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
for (const server of servers) { for (const server of servers) {
const body = await server.captions.list({ videoId: uuid }) const body = await server.captions.list({ videoId: uuid })
expect(body.total).to.equal(1) expect(body.total).to.equal(1)
@ -37,6 +37,7 @@ export async function checkCaption (servers: PeerTubeServer[], uuid: string, cap
const caption = body.data[0] const caption = body.data[0]
expect(caption.language.id).to.equal('en') expect(caption.language.id).to.equal('en')
expect(caption.language.label).to.equal('English') expect(caption.language.label).to.equal('English')
expect(caption.automaticallyGenerated).to.be.true
{ {
await testCaptionFile(server.url, caption.captionPath, captionContains) await testCaptionFile(server.url, caption.captionPath, captionContains)

View File

@ -81,7 +81,12 @@ async function createVideoCaption (req: express.Request, res: express.Response)
const captionLanguage = req.params.captionLanguage const captionLanguage = req.params.captionLanguage
const videoCaption = await createLocalCaption({ video, language: captionLanguage, path: videoCaptionPhysicalFile.path }) const videoCaption = await createLocalCaption({
video,
language: captionLanguage,
path: videoCaptionPhysicalFile.path,
automaticallyGenerated: false
})
await sequelizeTypescript.transaction(async t => { await sequelizeTypescript.transaction(async t => {
await federateVideoIfNeeded(video, false, t) await federateVideoIfNeeded(video, false, t)

View File

@ -73,6 +73,7 @@ const contextStore: { [ id in ContextType ]: (string | { [ id: string ]: string
category: 'sc:category', category: 'sc:category',
licence: 'sc:license', licence: 'sc:license',
subtitleLanguage: 'sc:subtitleLanguage', subtitleLanguage: 'sc:subtitleLanguage',
automaticallyGenerated: 'pt:automaticallyGenerated',
sensitive: 'as:sensitive', sensitive: 'as:sensitive',
language: 'sc:inLanguage', language: 'sc:inLanguage',
identifier: 'sc:identifier', identifier: 'sc:identifier',

View File

@ -47,7 +47,7 @@ import { cpus } from 'os'
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
const LAST_MIGRATION_VERSION = 855 const LAST_MIGRATION_VERSION = 860
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -0,0 +1,31 @@
import * as Sequelize from 'sequelize'
async function up (utils: {
transaction: Sequelize.Transaction
queryInterface: Sequelize.QueryInterface
sequelize: Sequelize.Sequelize
}): Promise<void> {
const { transaction } = utils
{
await utils.queryInterface.addColumn('videoCaption', 'automaticallyGenerated', {
type: Sequelize.BOOLEAN,
defaultValue: false,
allowNull: false
}, { transaction })
await utils.queryInterface.changeColumn('videoCaption', 'automaticallyGenerated', {
type: Sequelize.BOOLEAN,
defaultValue: null,
allowNull: false
}, { transaction })
}
}
function down (options) {
throw new Error('Not implemented.')
}
export {
down, up
}

View File

@ -155,6 +155,7 @@ export function getCaptionAttributesFromObject (video: MVideoId, videoObject: Vi
videoId: video.id, videoId: video.id,
filename: VideoCaptionModel.generateCaptionName(c.identifier), filename: VideoCaptionModel.generateCaptionName(c.identifier),
language: c.identifier, language: c.identifier,
automaticallyGenerated: c.automaticallyGenerated === true,
fileUrl: c.url fileUrl: c.url
})) }))
} }

View File

@ -198,6 +198,7 @@ export class VideosExporter extends AbstractUserExporter <VideoExportJSON> {
updatedAt: c.updatedAt.toISOString(), updatedAt: c.updatedAt.toISOString(),
language: c.language, language: c.language,
filename: c.filename, filename: c.filename,
automaticallyGenerated: c.automaticallyGenerated,
fileUrl: c.getFileUrl(video) fileUrl: c.getFileUrl(video)
})) }))
} }

View File

@ -97,6 +97,7 @@ export class VideosImporter extends AbstractUserImporter <VideoExportJSON, Impor
if (!isArray(o.chapters)) o.chapters = [] if (!isArray(o.chapters)) o.chapters = []
o.tags = o.tags.filter(t => isVideoTagValid(t)) o.tags = o.tags.filter(t => isVideoTagValid(t))
o.captions = o.captions.filter(c => isVideoCaptionLanguageValid(c.language)) o.captions = o.captions.filter(c => isVideoCaptionLanguageValid(c.language))
o.chapters = o.chapters.filter(c => isVideoChapterTimecodeValid(c.timecode) && isVideoChapterTitleValid(c.title)) o.chapters = o.chapters.filter(c => isVideoChapterTimecodeValid(c.timecode) && isVideoChapterTitleValid(c.title))
@ -269,7 +270,12 @@ export class VideosImporter extends AbstractUserImporter <VideoExportJSON, Impor
if (!await this.isFileValidOrLog(absoluteFilePath, CONSTRAINTS_FIELDS.VIDEO_CAPTIONS.CAPTION_FILE.FILE_SIZE.max)) continue if (!await this.isFileValidOrLog(absoluteFilePath, CONSTRAINTS_FIELDS.VIDEO_CAPTIONS.CAPTION_FILE.FILE_SIZE.max)) continue
await createLocalCaption({ video, language: captionImport.language, path: absoluteFilePath }) await createLocalCaption({
video,
language: captionImport.language,
path: absoluteFilePath,
automaticallyGenerated: captionImport.automaticallyGenerated === true
})
captionPaths.push(absoluteFilePath) captionPaths.push(absoluteFilePath)
} }

View File

@ -25,13 +25,15 @@ export async function createLocalCaption (options: {
video: MVideo video: MVideo
path: string path: string
language: string language: string
automaticallyGenerated: boolean
}) { }) {
const { language, path, video } = options const { language, path, video, automaticallyGenerated } = options
const videoCaption = new VideoCaptionModel({ const videoCaption = new VideoCaptionModel({
videoId: video.id, videoId: video.id,
filename: VideoCaptionModel.generateCaptionName(language), filename: VideoCaptionModel.generateCaptionName(language),
language language,
automaticallyGenerated
}) as MVideoCaption }) as MVideoCaption
await moveAndProcessCaptionFile({ path }, videoCaption) await moveAndProcessCaptionFile({ path }, videoCaption)
@ -148,7 +150,8 @@ export async function onTranscriptionEnded (options: {
const caption = await createLocalCaption({ const caption = await createLocalCaption({
video, video,
language, language,
path: vttPath path: vttPath,
automaticallyGenerated: true
}) })
await sequelizeTypescript.transaction(async t => { await sequelizeTypescript.transaction(async t => {

View File

@ -317,7 +317,12 @@ async function processYoutubeSubtitles (youtubeDL: YoutubeDLWrapper, targetUrl:
continue continue
} }
await createLocalCaption({ language: subtitle.language, path: subtitle.path, video }) await createLocalCaption({
language: subtitle.language,
path: subtitle.path,
video,
automaticallyGenerated: false
})
logger.info('Added %s youtube-dl subtitle', subtitle.path) logger.info('Added %s youtube-dl subtitle', subtitle.path)
} }

View File

@ -1,3 +1,12 @@
import { VideoCaption, VideoCaptionObject } from '@peertube/peertube-models'
import { buildUUID } from '@peertube/peertube-node-utils'
import {
MVideo,
MVideoCaption,
MVideoCaptionFormattable,
MVideoCaptionLanguageUrl,
MVideoCaptionVideo
} from '@server/types/models/index.js'
import { remove } from 'fs-extra/esm' import { remove } from 'fs-extra/esm'
import { join } from 'path' import { join } from 'path'
import { Op, OrderItem, Transaction } from 'sequelize' import { Op, OrderItem, Transaction } from 'sequelize'
@ -13,15 +22,6 @@ import {
Table, Table,
UpdatedAt UpdatedAt
} from 'sequelize-typescript' } from 'sequelize-typescript'
import { ActivityIdentifierObject, VideoCaption } from '@peertube/peertube-models'
import {
MVideo,
MVideoCaption,
MVideoCaptionFormattable,
MVideoCaptionLanguageUrl,
MVideoCaptionVideo
} from '@server/types/models/index.js'
import { buildUUID } from '@peertube/peertube-node-utils'
import { isVideoCaptionLanguageValid } from '../../helpers/custom-validators/video-captions.js' import { isVideoCaptionLanguageValid } from '../../helpers/custom-validators/video-captions.js'
import { logger } from '../../helpers/logger.js' import { logger } from '../../helpers/logger.js'
import { CONFIG } from '../../initializers/config.js' import { CONFIG } from '../../initializers/config.js'
@ -81,6 +81,10 @@ export class VideoCaptionModel extends SequelizeModel<VideoCaptionModel> {
@Column(DataType.STRING(CONSTRAINTS_FIELDS.COMMONS.URL.max)) @Column(DataType.STRING(CONSTRAINTS_FIELDS.COMMONS.URL.max))
fileUrl: string fileUrl: string
@AllowNull(false)
@Column
automaticallyGenerated: boolean
@ForeignKey(() => VideoModel) @ForeignKey(() => VideoModel)
@Column @Column
videoId: number videoId: number
@ -228,15 +232,17 @@ export class VideoCaptionModel extends SequelizeModel<VideoCaptionModel> {
id: this.language, id: this.language,
label: VideoCaptionModel.getLanguageLabel(this.language) label: VideoCaptionModel.getLanguageLabel(this.language)
}, },
automaticallyGenerated: this.automaticallyGenerated,
captionPath: this.getCaptionStaticPath(), captionPath: this.getCaptionStaticPath(),
updatedAt: this.updatedAt.toISOString() updatedAt: this.updatedAt.toISOString()
} }
} }
toActivityPubObject (this: MVideoCaptionLanguageUrl, video: MVideo): ActivityIdentifierObject { toActivityPubObject (this: MVideoCaptionLanguageUrl, video: MVideo): VideoCaptionObject {
return { return {
identifier: this.language, identifier: this.language,
name: VideoCaptionModel.getLanguageLabel(this.language), name: VideoCaptionModel.getLanguageLabel(this.language),
automaticallyGenerated: this.automaticallyGenerated,
url: this.getFileUrl(video) url: this.getFileUrl(video)
} }
} }

View File

@ -1886,7 +1886,7 @@ export class VideoModel extends SequelizeModel<VideoModel> {
if (isArray(videoAP.VideoCaptions)) return videoAP.VideoCaptions if (isArray(videoAP.VideoCaptions)) return videoAP.VideoCaptions
return this.$get('VideoCaptions', { return this.$get('VideoCaptions', {
attributes: [ 'filename', 'language', 'fileUrl' ], attributes: [ 'filename', 'language', 'fileUrl', 'automaticallyGenerated' ],
transaction transaction
}) as Promise<MVideoCaptionLanguageUrl[]> }) as Promise<MVideoCaptionLanguageUrl[]>
} }

View File

@ -12,7 +12,8 @@ export type MVideoCaption = Omit<VideoCaptionModel, 'Video'>
export type MVideoCaptionLanguage = Pick<MVideoCaption, 'language'> export type MVideoCaptionLanguage = Pick<MVideoCaption, 'language'>
export type MVideoCaptionLanguageUrl = export type MVideoCaptionLanguageUrl =
Pick<MVideoCaption, 'language' | 'fileUrl' | 'filename' | 'getFileUrl' | 'getCaptionStaticPath' | 'toActivityPubObject'> Pick<MVideoCaption, 'language' | 'fileUrl' | 'filename' | 'automaticallyGenerated' | 'getFileUrl' | 'getCaptionStaticPath' |
'toActivityPubObject'>
export type MVideoCaptionVideo = export type MVideoCaptionVideo =
MVideoCaption & MVideoCaption &