Fix plaintext markdown converter

This commit is contained in:
Chocobozzz 2022-02-04 10:31:54 +01:00
parent 457c83486e
commit c68e2b2d22
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
7 changed files with 104 additions and 27 deletions

View File

@ -1,6 +1,6 @@
import { Injectable } from '@angular/core' import { Injectable } from '@angular/core'
import { getCustomMarkupSanitizeOptions, getDefaultSanitizeOptions } from '@shared/core-utils/renderer/html'
import { LinkifierService } from './linkifier.service' import { LinkifierService } from './linkifier.service'
import { getCustomMarkupSanitizeOptions, getSanitizeOptions } from '@shared/core-utils/renderer/html'
@Injectable() @Injectable()
export class HtmlRendererService { export class HtmlRendererService {
@ -30,7 +30,7 @@ export class HtmlRendererService {
const options = additionalAllowedTags.length !== 0 const options = additionalAllowedTags.length !== 0
? getCustomMarkupSanitizeOptions(additionalAllowedTags) ? getCustomMarkupSanitizeOptions(additionalAllowedTags)
: getSanitizeOptions() : getDefaultSanitizeOptions()
return this.sanitizeHtml(html, options) return this.sanitizeHtml(html, options)
} }

View File

@ -1,6 +1,6 @@
import express from 'express' import express from 'express'
import Feed from 'pfeed' import Feed from 'pfeed'
import { mdToPlainText, toSafeHtml } from '@server/helpers/markdown' import { mdToOneLinePlainText, toSafeHtml } from '@server/helpers/markdown'
import { getServerActor } from '@server/models/application/application' import { getServerActor } from '@server/models/application/application'
import { getCategoryLabel } from '@server/models/video/formatter/video-format-utils' import { getCategoryLabel } from '@server/models/video/formatter/video-format-utils'
import { VideoInclude } from '@shared/models' import { VideoInclude } from '@shared/models'
@ -236,7 +236,7 @@ function initFeed (parameters: {
return new Feed({ return new Feed({
title: name, title: name,
description: mdToPlainText(description), description: mdToOneLinePlainText(description),
// updated: TODO: somehowGetLatestUpdate, // optional, default = today // updated: TODO: somehowGetLatestUpdate, // optional, default = today
id: webserverUrl, id: webserverUrl,
link: webserverUrl, link: webserverUrl,
@ -299,7 +299,7 @@ function addVideosToFeed (feed, videos: VideoModel[]) {
title: video.name, title: video.name,
id: video.url, id: video.url,
link: WEBSERVER.URL + video.getWatchStaticPath(), link: WEBSERVER.URL + video.getWatchStaticPath(),
description: mdToPlainText(video.getTruncatedDescription()), description: mdToOneLinePlainText(video.getTruncatedDescription()),
content: toSafeHtml(video.description), content: toSafeHtml(video.description),
author: [ author: [
{ {

View File

@ -1,14 +1,14 @@
import { getSanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils' import { getDefaultSanitizeOptions, getTextOnlySanitizeOptions, TEXT_WITH_HTML_RULES } from '@shared/core-utils'
const sanitizeOptions = getSanitizeOptions() const defaultSanitizeOptions = getDefaultSanitizeOptions()
const textOnlySanitizeOptions = getTextOnlySanitizeOptions()
const sanitizeHtml = require('sanitize-html') const sanitizeHtml = require('sanitize-html')
const markdownItEmoji = require('markdown-it-emoji/light') const markdownItEmoji = require('markdown-it-emoji/light')
const MarkdownItClass = require('markdown-it') const MarkdownItClass = require('markdown-it')
const markdownIt = new MarkdownItClass('default', { linkify: true, breaks: true, html: true })
markdownIt.enable(TEXT_WITH_HTML_RULES) const markdownItWithHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: true })
markdownIt.use(markdownItEmoji) const markdownItWithoutHTML = new MarkdownItClass('default', { linkify: true, breaks: true, html: false })
const toSafeHtml = (text: string) => { const toSafeHtml = (text: string) => {
if (!text) return '' if (!text) return ''
@ -17,29 +17,65 @@ const toSafeHtml = (text: string) => {
const textWithLineFeed = text.replace(/<br.?\/?>/g, '\r\n') const textWithLineFeed = text.replace(/<br.?\/?>/g, '\r\n')
// Convert possible markdown (emojis, emphasis and lists) to html // Convert possible markdown (emojis, emphasis and lists) to html
const html = markdownIt.render(textWithLineFeed) const html = markdownItWithHTML.enable(TEXT_WITH_HTML_RULES)
.use(markdownItEmoji)
.render(textWithLineFeed)
// Convert to safe Html // Convert to safe Html
return sanitizeHtml(html, sanitizeOptions) return sanitizeHtml(html, defaultSanitizeOptions)
} }
const mdToPlainText = (text: string) => { const mdToOneLinePlainText = (text: string) => {
if (!text) return '' if (!text) return ''
// Convert possible markdown (emojis, emphasis and lists) to html markdownItWithoutHTML.use(markdownItEmoji)
const html = markdownIt.render(text) .use(plainTextPlugin)
.render(text)
// Convert to safe Html // Convert to safe Html
const safeHtml = sanitizeHtml(html, sanitizeOptions) return sanitizeHtml(markdownItWithoutHTML.plainText, textOnlySanitizeOptions)
return safeHtml.replace(/<[^>]+>/g, '')
.replace(/\n$/, '')
.replace(/\n/g, ', ')
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
export { export {
toSafeHtml, toSafeHtml,
mdToPlainText mdToOneLinePlainText
}
// ---------------------------------------------------------------------------
// Thanks: https://github.com/wavesheep/markdown-it-plain-text
function plainTextPlugin (markdownIt: any) {
let lastSeparator = ''
function plainTextRule (state: any) {
const text = scan(state.tokens)
markdownIt.plainText = text.replace(/\s+/g, ' ')
}
function scan (tokens: any[]) {
let text = ''
for (const token of tokens) {
if (token.children !== null) {
text += scan(token.children)
continue
}
if (token.type === 'list_item_close') {
lastSeparator = ', '
} else if (/[a-zA-Z]+_close/.test(token.type)) {
lastSeparator = ' '
} else if (token.content) {
text += lastSeparator
text += token.content
}
}
return text
}
markdownIt.core.ruler.push('plainText', plainTextRule)
} }

View File

@ -12,7 +12,7 @@ import { HttpStatusCode } from '../../shared/models/http/http-error-codes'
import { VideoPlaylistPrivacy, VideoPrivacy } from '../../shared/models/videos' import { VideoPlaylistPrivacy, VideoPrivacy } from '../../shared/models/videos'
import { isTestInstance } from '../helpers/core-utils' import { isTestInstance } from '../helpers/core-utils'
import { logger } from '../helpers/logger' import { logger } from '../helpers/logger'
import { mdToPlainText } from '../helpers/markdown' import { mdToOneLinePlainText } from '../helpers/markdown'
import { CONFIG } from '../initializers/config' import { CONFIG } from '../initializers/config'
import { import {
ACCEPT_HEADERS, ACCEPT_HEADERS,
@ -103,7 +103,7 @@ class ClientHtml {
res.status(HttpStatusCode.NOT_FOUND_404) res.status(HttpStatusCode.NOT_FOUND_404)
return html return html
} }
const description = mdToPlainText(video.description) const description = mdToOneLinePlainText(video.description)
let customHtml = ClientHtml.addTitleTag(html, video.name) let customHtml = ClientHtml.addTitleTag(html, video.name)
customHtml = ClientHtml.addDescriptionTag(customHtml, description) customHtml = ClientHtml.addDescriptionTag(customHtml, description)
@ -164,7 +164,7 @@ class ClientHtml {
return html return html
} }
const description = mdToPlainText(videoPlaylist.description) const description = mdToOneLinePlainText(videoPlaylist.description)
let customHtml = ClientHtml.addTitleTag(html, videoPlaylist.name) let customHtml = ClientHtml.addTitleTag(html, videoPlaylist.name)
customHtml = ClientHtml.addDescriptionTag(customHtml, description) customHtml = ClientHtml.addDescriptionTag(customHtml, description)
@ -263,7 +263,7 @@ class ClientHtml {
return ClientHtml.getIndexHTML(req, res) return ClientHtml.getIndexHTML(req, res)
} }
const description = mdToPlainText(entity.description) const description = mdToOneLinePlainText(entity.description)
let customHtml = ClientHtml.addTitleTag(html, entity.getDisplayName()) let customHtml = ClientHtml.addTitleTag(html, entity.getDisplayName())
customHtml = ClientHtml.addDescriptionTag(customHtml, description) customHtml = ClientHtml.addDescriptionTag(customHtml, description)

View File

@ -1,4 +1,5 @@
import './image' import './image'
import './core-utils' import './core-utils'
import './comment-model' import './comment-model'
import './markdown'
import './request' import './request'

View File

@ -0,0 +1,34 @@
/* eslint-disable @typescript-eslint/no-unused-expressions,@typescript-eslint/require-await */
import 'mocha'
import { mdToOneLinePlainText } from '@server/helpers/markdown'
import { expect } from 'chai'
describe('Markdown helpers', function () {
describe('Plain text', function () {
it('Should convert a list to plain text', function () {
const result = mdToOneLinePlainText(`* list 1
* list 2
* list 3`)
expect(result).to.equal('list 1, list 2, list 3')
})
it('Should convert a list with indentation to plain text', function () {
const result = mdToOneLinePlainText(`Hello:
* list 1
* list 2
* list 3`)
expect(result).to.equal('Hello: list 1, list 2, list 3')
})
it('Should convert HTML to plain text', function () {
const result = mdToOneLinePlainText(`**Hello** <strong>coucou</strong>`)
expect(result).to.equal('Hello coucou')
})
})
})

View File

@ -1,4 +1,4 @@
export function getSanitizeOptions () { export function getDefaultSanitizeOptions () {
return { return {
allowedTags: [ 'a', 'p', 'span', 'br', 'strong', 'em', 'ul', 'ol', 'li' ], allowedTags: [ 'a', 'p', 'span', 'br', 'strong', 'em', 'ul', 'ol', 'li' ],
allowedSchemes: [ 'http', 'https' ], allowedSchemes: [ 'http', 'https' ],
@ -23,8 +23,14 @@ export function getSanitizeOptions () {
} }
} }
export function getTextOnlySanitizeOptions () {
return {
allowedTags: [] as string[]
}
}
export function getCustomMarkupSanitizeOptions (additionalAllowedTags: string[] = []) { export function getCustomMarkupSanitizeOptions (additionalAllowedTags: string[] = []) {
const base = getSanitizeOptions() const base = getDefaultSanitizeOptions()
return { return {
allowedTags: [ allowedTags: [