Metadata to know if the caption is auto generated

This commit is contained in:
Chocobozzz 2024-06-27 15:29:26 +02:00
parent 1bfb791e05
commit fd4831e502
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
22 changed files with 101 additions and 29 deletions

View File

@ -4,6 +4,7 @@ export * from './cache-file-object.js'
export * from './common-objects.js'
export * from './playlist-element-object.js'
export * from './playlist-object.js'
export * from './video-caption-object.js'
export * from './video-chapters-object.js'
export * from './video-comment-object.js'
export * from './video-object.js'

View File

@ -0,0 +1,5 @@
import { ActivityIdentifierObject } from './common-objects.js'
export interface VideoCaptionObject extends ActivityIdentifierObject {
automaticallyGenerated: boolean
}

View File

@ -6,6 +6,7 @@ import {
ActivityTagObject,
ActivityUrlObject
} from './common-objects.js'
import { VideoCaptionObject } from './video-caption-object.js'
import { VideoChapterObject } from './video-chapters-object.js'
export interface VideoObject {
@ -18,7 +19,7 @@ export interface VideoObject {
category: ActivityIdentifierObject
licence: ActivityIdentifierObject
language: ActivityIdentifierObject
subtitleLanguage: ActivityIdentifierObject[]
subtitleLanguage: VideoCaptionObject[]
views: number

View File

@ -73,6 +73,7 @@ export interface VideoExportJSON {
language: string
filename: string
fileUrl: string
automaticallyGenerated: boolean
}[]
chapters: {

View File

@ -3,5 +3,6 @@ import { VideoConstant } from '../video-constant.model.js'
export interface VideoCaption {
language: VideoConstant<string>
captionPath: string
automaticallyGenerated: boolean
updatedAt: string
}

View File

@ -7,7 +7,7 @@
"scripts": {
"build": "tsc",
"watch": "tsc -w",
"install-dependencies:transcription": "pip install -r ./requirements.txt ../transcription-devtools/requirements.txt"
"install-dependencies:transcription": "pip install -r ./requirements.txt -r ../transcription-devtools/requirements.txt"
},
"dependencies": {}
}

View File

@ -72,12 +72,14 @@ describe('Test video captions', function () {
expect(caption1.language.id).to.equal('ar')
expect(caption1.language.label).to.equal('Arabic')
expect(caption1.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-ar.vtt$'))
expect(caption1.automaticallyGenerated).to.be.false
await testCaptionFile(server.url, caption1.captionPath, 'Subtitle good 1.')
const caption2 = body.data[1]
expect(caption2.language.id).to.equal('zh')
expect(caption2.language.label).to.equal('Chinese')
expect(caption2.captionPath).to.match(new RegExp('^/lazy-static/video-captions/' + uuidRegex + '-zh.vtt$'))
expect(caption1.automaticallyGenerated).to.be.false
await testCaptionFile(server.url, caption2.captionPath, 'Subtitle good 2.')
}
})

View File

@ -13,7 +13,7 @@ import {
waitJobs
} from '@peertube/peertube-server-commands'
import { FIXTURE_URLS } from '@tests/shared/fixture-urls.js'
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
import { checkAutoCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
describe('Test video transcription', function () {
let servers: PeerTubeServer[]
@ -48,7 +48,7 @@ describe('Test video transcription', function () {
await waitJobs(servers)
await checkLanguage(servers, uuid, 'en')
await checkCaption(servers, uuid)
await checkAutoCaption(servers, uuid)
})
it('Should run transcription on upload by default', async function () {
@ -57,7 +57,7 @@ describe('Test video transcription', function () {
const uuid = await uploadForTranscription(servers[0])
await waitJobs(servers)
await checkCaption(servers, uuid)
await checkAutoCaption(servers, uuid)
await checkLanguage(servers, uuid, 'en')
})
@ -73,7 +73,7 @@ describe('Test video transcription', function () {
})
await waitJobs(servers)
await checkCaption(servers, video.uuid)
await checkAutoCaption(servers, video.uuid)
await checkLanguage(servers, video.uuid, 'en')
})
@ -96,7 +96,7 @@ describe('Test video transcription', function () {
await servers[0].live.waitUntilReplacedByReplay({ videoId: video.id })
await waitJobs(servers)
await checkCaption(servers, video.uuid, 'WEBVTT\n\n00:')
await checkAutoCaption(servers, video.uuid, 'WEBVTT\n\n00:')
await checkLanguage(servers, video.uuid, 'en')
await servers[0].config.enableLive({ allowReplay: false })

View File

@ -13,7 +13,7 @@ import {
} from '@peertube/peertube-server-commands'
import { checkPeerTubeRunnerCacheIsEmpty } from '@tests/shared/directories.js'
import { PeerTubeRunnerProcess } from '@tests/shared/peertube-runner-process.js'
import { checkCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
import { checkAutoCaption, checkLanguage, checkNoCaption, uploadForTranscription } from '@tests/shared/transcription.js'
describe('Test transcription in peertube-runner program', function () {
let servers: PeerTubeServer[] = []
@ -46,7 +46,7 @@ describe('Test transcription in peertube-runner program', function () {
const uuid = await uploadForTranscription(servers[0])
await waitJobs(servers, { runnerJobs: true })
await checkCaption(servers, uuid)
await checkAutoCaption(servers, uuid)
await checkLanguage(servers, uuid, 'en')
})

View File

@ -28,7 +28,7 @@ export function getCustomModelPath (modelName: CustomModelName) {
// ---------------------------------------------------------------------------
export async function checkCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
for (const server of servers) {
const body = await server.captions.list({ videoId: uuid })
expect(body.total).to.equal(1)
@ -37,6 +37,7 @@ export async function checkCaption (servers: PeerTubeServer[], uuid: string, cap
const caption = body.data[0]
expect(caption.language.id).to.equal('en')
expect(caption.language.label).to.equal('English')
expect(caption.automaticallyGenerated).to.be.true
{
await testCaptionFile(server.url, caption.captionPath, captionContains)

View File

@ -81,7 +81,12 @@ async function createVideoCaption (req: express.Request, res: express.Response)
const captionLanguage = req.params.captionLanguage
const videoCaption = await createLocalCaption({ video, language: captionLanguage, path: videoCaptionPhysicalFile.path })
const videoCaption = await createLocalCaption({
video,
language: captionLanguage,
path: videoCaptionPhysicalFile.path,
automaticallyGenerated: false
})
await sequelizeTypescript.transaction(async t => {
await federateVideoIfNeeded(video, false, t)

View File

@ -73,6 +73,7 @@ const contextStore: { [ id in ContextType ]: (string | { [ id: string ]: string
category: 'sc:category',
licence: 'sc:license',
subtitleLanguage: 'sc:subtitleLanguage',
automaticallyGenerated: 'pt:automaticallyGenerated',
sensitive: 'as:sensitive',
language: 'sc:inLanguage',
identifier: 'sc:identifier',

View File

@ -47,7 +47,7 @@ import { cpus } from 'os'
// ---------------------------------------------------------------------------
const LAST_MIGRATION_VERSION = 855
const LAST_MIGRATION_VERSION = 860
// ---------------------------------------------------------------------------

View File

@ -0,0 +1,31 @@
import * as Sequelize from 'sequelize'
async function up (utils: {
transaction: Sequelize.Transaction
queryInterface: Sequelize.QueryInterface
sequelize: Sequelize.Sequelize
}): Promise<void> {
const { transaction } = utils
{
await utils.queryInterface.addColumn('videoCaption', 'automaticallyGenerated', {
type: Sequelize.BOOLEAN,
defaultValue: false,
allowNull: false
}, { transaction })
await utils.queryInterface.changeColumn('videoCaption', 'automaticallyGenerated', {
type: Sequelize.BOOLEAN,
defaultValue: null,
allowNull: false
}, { transaction })
}
}
function down (options) {
throw new Error('Not implemented.')
}
export {
down, up
}

View File

@ -155,6 +155,7 @@ export function getCaptionAttributesFromObject (video: MVideoId, videoObject: Vi
videoId: video.id,
filename: VideoCaptionModel.generateCaptionName(c.identifier),
language: c.identifier,
automaticallyGenerated: c.automaticallyGenerated === true,
fileUrl: c.url
}))
}

View File

@ -198,6 +198,7 @@ export class VideosExporter extends AbstractUserExporter <VideoExportJSON> {
updatedAt: c.updatedAt.toISOString(),
language: c.language,
filename: c.filename,
automaticallyGenerated: c.automaticallyGenerated,
fileUrl: c.getFileUrl(video)
}))
}

View File

@ -97,6 +97,7 @@ export class VideosImporter extends AbstractUserImporter <VideoExportJSON, Impor
if (!isArray(o.chapters)) o.chapters = []
o.tags = o.tags.filter(t => isVideoTagValid(t))
o.captions = o.captions.filter(c => isVideoCaptionLanguageValid(c.language))
o.chapters = o.chapters.filter(c => isVideoChapterTimecodeValid(c.timecode) && isVideoChapterTitleValid(c.title))
@ -269,7 +270,12 @@ export class VideosImporter extends AbstractUserImporter <VideoExportJSON, Impor
if (!await this.isFileValidOrLog(absoluteFilePath, CONSTRAINTS_FIELDS.VIDEO_CAPTIONS.CAPTION_FILE.FILE_SIZE.max)) continue
await createLocalCaption({ video, language: captionImport.language, path: absoluteFilePath })
await createLocalCaption({
video,
language: captionImport.language,
path: absoluteFilePath,
automaticallyGenerated: captionImport.automaticallyGenerated === true
})
captionPaths.push(absoluteFilePath)
}

View File

@ -25,13 +25,15 @@ export async function createLocalCaption (options: {
video: MVideo
path: string
language: string
automaticallyGenerated: boolean
}) {
const { language, path, video } = options
const { language, path, video, automaticallyGenerated } = options
const videoCaption = new VideoCaptionModel({
videoId: video.id,
filename: VideoCaptionModel.generateCaptionName(language),
language
language,
automaticallyGenerated
}) as MVideoCaption
await moveAndProcessCaptionFile({ path }, videoCaption)
@ -148,7 +150,8 @@ export async function onTranscriptionEnded (options: {
const caption = await createLocalCaption({
video,
language,
path: vttPath
path: vttPath,
automaticallyGenerated: true
})
await sequelizeTypescript.transaction(async t => {

View File

@ -317,7 +317,12 @@ async function processYoutubeSubtitles (youtubeDL: YoutubeDLWrapper, targetUrl:
continue
}
await createLocalCaption({ language: subtitle.language, path: subtitle.path, video })
await createLocalCaption({
language: subtitle.language,
path: subtitle.path,
video,
automaticallyGenerated: false
})
logger.info('Added %s youtube-dl subtitle', subtitle.path)
}

View File

@ -1,3 +1,12 @@
import { VideoCaption, VideoCaptionObject } from '@peertube/peertube-models'
import { buildUUID } from '@peertube/peertube-node-utils'
import {
MVideo,
MVideoCaption,
MVideoCaptionFormattable,
MVideoCaptionLanguageUrl,
MVideoCaptionVideo
} from '@server/types/models/index.js'
import { remove } from 'fs-extra/esm'
import { join } from 'path'
import { Op, OrderItem, Transaction } from 'sequelize'
@ -13,15 +22,6 @@ import {
Table,
UpdatedAt
} from 'sequelize-typescript'
import { ActivityIdentifierObject, VideoCaption } from '@peertube/peertube-models'
import {
MVideo,
MVideoCaption,
MVideoCaptionFormattable,
MVideoCaptionLanguageUrl,
MVideoCaptionVideo
} from '@server/types/models/index.js'
import { buildUUID } from '@peertube/peertube-node-utils'
import { isVideoCaptionLanguageValid } from '../../helpers/custom-validators/video-captions.js'
import { logger } from '../../helpers/logger.js'
import { CONFIG } from '../../initializers/config.js'
@ -81,6 +81,10 @@ export class VideoCaptionModel extends SequelizeModel<VideoCaptionModel> {
@Column(DataType.STRING(CONSTRAINTS_FIELDS.COMMONS.URL.max))
fileUrl: string
@AllowNull(false)
@Column
automaticallyGenerated: boolean
@ForeignKey(() => VideoModel)
@Column
videoId: number
@ -228,15 +232,17 @@ export class VideoCaptionModel extends SequelizeModel<VideoCaptionModel> {
id: this.language,
label: VideoCaptionModel.getLanguageLabel(this.language)
},
automaticallyGenerated: this.automaticallyGenerated,
captionPath: this.getCaptionStaticPath(),
updatedAt: this.updatedAt.toISOString()
}
}
toActivityPubObject (this: MVideoCaptionLanguageUrl, video: MVideo): ActivityIdentifierObject {
toActivityPubObject (this: MVideoCaptionLanguageUrl, video: MVideo): VideoCaptionObject {
return {
identifier: this.language,
name: VideoCaptionModel.getLanguageLabel(this.language),
automaticallyGenerated: this.automaticallyGenerated,
url: this.getFileUrl(video)
}
}

View File

@ -1886,7 +1886,7 @@ export class VideoModel extends SequelizeModel<VideoModel> {
if (isArray(videoAP.VideoCaptions)) return videoAP.VideoCaptions
return this.$get('VideoCaptions', {
attributes: [ 'filename', 'language', 'fileUrl' ],
attributes: [ 'filename', 'language', 'fileUrl', 'automaticallyGenerated' ],
transaction
}) as Promise<MVideoCaptionLanguageUrl[]>
}

View File

@ -12,7 +12,8 @@ export type MVideoCaption = Omit<VideoCaptionModel, 'Video'>
export type MVideoCaptionLanguage = Pick<MVideoCaption, 'language'>
export type MVideoCaptionLanguageUrl =
Pick<MVideoCaption, 'language' | 'fileUrl' | 'filename' | 'getFileUrl' | 'getCaptionStaticPath' | 'toActivityPubObject'>
Pick<MVideoCaption, 'language' | 'fileUrl' | 'filename' | 'automaticallyGenerated' | 'getFileUrl' | 'getCaptionStaticPath' |
'toActivityPubObject'>
export type MVideoCaptionVideo =
MVideoCaption &