Merge branch 'webcodec-h264' of https://github.com/any1/noVNC

This commit is contained in:
Pierre Ossman 2024-08-29 16:59:25 +02:00
commit 047531e886
6 changed files with 663 additions and 4 deletions

321
core/decoders/h264.js Normal file
View File

@ -0,0 +1,321 @@
/*
* noVNC: HTML5 VNC client
* Copyright (C) 2024 The noVNC Authors
* Licensed under MPL 2.0 (see LICENSE.txt)
*
* See README.md for usage and integration instructions.
*
*/
import * as Log from '../util/logging.js';
export class H264Parser {
constructor(data) {
this._data = data;
this._index = 0;
this.profileIdc = null;
this.constraintSet = null;
this.levelIdc = null;
}
_getStartSequenceLen(index) {
let data = this._data;
if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 0 && data[index + 3] == 1) {
return 4;
}
if (data[index + 0] == 0 && data[index + 1] == 0 && data[index + 2] == 1) {
return 3;
}
return 0;
}
_indexOfNextNalUnit(index) {
let data = this._data;
for (let i = index; i < data.length; ++i) {
if (this._getStartSequenceLen(i) != 0) {
return i;
}
}
return -1;
}
_parseSps(index) {
this.profileIdc = this._data[index];
this.constraintSet = this._data[index + 1];
this.levelIdc = this._data[index + 2];
}
_parseNalUnit(index) {
const firstByte = this._data[index];
if (firstByte & 0x80) {
throw new Error('H264 parsing sanity check failed, forbidden zero bit is set');
}
const unitType = firstByte & 0x1f;
switch (unitType) {
case 1: // coded slice, non-idr
return { slice: true };
case 5: // coded slice, idr
return { slice: true, key: true };
case 6: // sei
return {};
case 7: // sps
this._parseSps(index + 1);
return {};
case 8: // pps
return {};
default:
Log.Warn("Unhandled unit type: ", unitType);
break;
}
return {};
}
parse() {
const startIndex = this._index;
let isKey = false;
while (this._index < this._data.length) {
const startSequenceLen = this._getStartSequenceLen(this._index);
if (startSequenceLen == 0) {
throw new Error('Invalid start sequence in bit stream');
}
const { slice, key } = this._parseNalUnit(this._index + startSequenceLen);
let nextIndex = this._indexOfNextNalUnit(this._index + startSequenceLen);
if (nextIndex == -1) {
this._index = this._data.length;
} else {
this._index = nextIndex;
}
if (key) {
isKey = true;
}
if (slice) {
break;
}
}
if (startIndex === this._index) {
return null;
}
return {
frame: this._data.subarray(startIndex, this._index),
key: isKey,
};
}
}
export class H264Context {
constructor(width, height) {
this.lastUsed = 0;
this._width = width;
this._height = height;
this._profileIdc = null;
this._constraintSet = null;
this._levelIdc = null;
this._decoder = null;
this._pendingFrames = [];
}
_handleFrame(frame) {
let pending = this._pendingFrames.shift();
if (pending === undefined) {
throw new Error("Pending frame queue empty when receiving frame from decoder");
}
if (pending.timestamp != frame.timestamp) {
throw new Error("Video frame timestamp mismatch. Expected " +
frame.timestamp + " but but got " + pending.timestamp);
}
pending.frame = frame;
pending.ready = true;
pending.resolve();
if (!pending.keep) {
frame.close();
}
}
_handleError(e) {
throw new Error("Failed to decode frame: " + e.message);
}
_configureDecoder(profileIdc, constraintSet, levelIdc) {
if (this._decoder === null || this._decoder.state === 'closed') {
this._decoder = new VideoDecoder({
output: frame => this._handleFrame(frame),
error: e => this._handleError(e),
});
}
const codec = 'avc1.' +
profileIdc.toString(16).padStart(2, '0') +
constraintSet.toString(16).padStart(2, '0') +
levelIdc.toString(16).padStart(2, '0');
this._decoder.configure({
codec: codec,
codedWidth: this._width,
codedHeight: this._height,
optimizeForLatency: true,
});
}
_preparePendingFrame(timestamp) {
let pending = {
timestamp: timestamp,
promise: null,
resolve: null,
frame: null,
ready: false,
keep: false,
};
pending.promise = new Promise((resolve) => {
pending.resolve = resolve;
});
this._pendingFrames.push(pending);
return pending;
}
decode(payload) {
let parser = new H264Parser(payload);
let result = null;
// Ideally, this timestamp should come from the server, but we'll just
// approximate it instead.
let timestamp = Math.round(window.performance.now() * 1e3);
while (true) {
let encodedFrame = parser.parse();
if (encodedFrame === null) {
break;
}
if (parser.profileIdc !== null) {
self._profileIdc = parser.profileIdc;
self._constraintSet = parser.constraintSet;
self._levelIdc = parser.levelIdc;
}
if (this._decoder === null || this._decoder.state !== 'configured') {
if (!encodedFrame.key) {
Log.Warn("Missing key frame. Can't decode until one arrives");
continue;
}
if (self._profileIdc === null) {
Log.Warn('Cannot config decoder. Have not received SPS and PPS yet.');
continue;
}
this._configureDecoder(self._profileIdc, self._constraintSet,
self._levelIdc);
}
result = this._preparePendingFrame(timestamp);
const chunk = new EncodedVideoChunk({
timestamp: timestamp,
type: encodedFrame.key ? 'key' : 'delta',
data: encodedFrame.frame,
});
try {
this._decoder.decode(chunk);
} catch (e) {
Log.Warn("Failed to decode:", e);
}
}
// We only keep last frame of each payload
if (result !== null) {
result.keep = true;
}
return result;
}
}
export default class H264Decoder {
constructor() {
this._tick = 0;
this._contexts = {};
}
_contextId(x, y, width, height) {
return [x, y, width, height].join(',');
}
_findOldestContextId() {
let oldestTick = Number.MAX_VALUE;
let oldestKey = undefined;
for (const [key, value] of Object.entries(this._contexts)) {
if (value.lastUsed < oldestTick) {
oldestTick = value.lastUsed;
oldestKey = key;
}
}
return oldestKey;
}
_createContext(x, y, width, height) {
const maxContexts = 64;
if (Object.keys(this._contexts).length >= maxContexts) {
let oldestContextId = this._findOldestContextId();
delete this._contexts[oldestContextId];
}
let context = new H264Context(width, height);
this._contexts[this._contextId(x, y, width, height)] = context;
return context;
}
_getContext(x, y, width, height) {
let context = this._contexts[this._contextId(x, y, width, height)];
return context !== undefined ? context : this._createContext(x, y, width, height);
}
_resetContext(x, y, width, height) {
delete this._contexts[this._contextId(x, y, width, height)];
}
_resetAllContexts() {
this._contexts = {};
}
decodeRect(x, y, width, height, sock, display, depth) {
const resetContextFlag = 1;
const resetAllContextsFlag = 2;
if (sock.rQwait("h264 header", 8)) {
return false;
}
const length = sock.rQshift32();
const flags = sock.rQshift32();
if (sock.rQwait("h264 payload", length, 8)) {
return false;
}
if (flags & resetAllContextsFlag) {
this._resetAllContexts();
} else if (flags & resetContextFlag) {
this._resetContext(x, y, width, height);
}
let context = this._getContext(x, y, width, height);
context.lastUsed = this._tick++;
if (length !== 0) {
let payload = sock.rQshiftBytes(length, false);
let frame = context.decode(payload);
if (frame !== null) {
display.videoFrame(x, y, width, height, frame);
}
}
return true;
}
}

View File

@ -380,6 +380,17 @@ export default class Display {
});
}
videoFrame(x, y, width, height, frame) {
this._renderQPush({
'type': 'frame',
'frame': frame,
'x': x,
'y': y,
'width': width,
'height': height
});
}
blitImage(x, y, width, height, arr, offset, fromQueue) {
if (this._renderQ.length !== 0 && !fromQueue) {
// NB(directxman12): it's technically more performant here to use preallocated arrays,
@ -406,9 +417,16 @@ export default class Display {
}
}
drawImage(img, x, y) {
this._drawCtx.drawImage(img, x, y);
this._damage(x, y, img.width, img.height);
drawImage(img, ...args) {
this._drawCtx.drawImage(img, ...args);
if (args.length <= 4) {
const [x, y] = args;
this._damage(x, y, img.width, img.height);
} else {
const [,, sw, sh, dx, dy] = args;
this._damage(dx, dy, sw, sh);
}
}
autoscale(containerWidth, containerHeight) {
@ -511,6 +529,35 @@ export default class Display {
ready = false;
}
break;
case 'frame':
if (a.frame.ready) {
// The encoded frame may be larger than the rect due to
// limitations of the encoder, so we need to crop the
// frame.
let frame = a.frame.frame;
if (frame.codedWidth < a.width || frame.codedHeight < a.height) {
Log.Warn("Decoded video frame does not cover its full rectangle area. Expecting at least " +
a.width + "x" + a.height + " but got " +
frame.codedWidth + "x" + frame.codedHeight);
}
const sx = 0;
const sy = 0;
const sw = a.width;
const sh = a.height;
const dx = a.x;
const dy = a.y;
const dw = sw;
const dh = sh;
this.drawImage(frame, sx, sy, sw, sh, dx, dy, dw, dh);
frame.close();
} else {
let display = this;
a.frame.promise.then(() => {
display._scanRenderQ();
});
ready = false;
}
break;
}
if (ready) {

View File

@ -16,6 +16,7 @@ export const encodings = {
encodingZRLE: 16,
encodingTightPNG: -260,
encodingJPEG: 21,
encodingH264: 50,
pseudoEncodingQualityLevel9: -23,
pseudoEncodingQualityLevel0: -32,
@ -46,6 +47,7 @@ export function encodingName(num) {
case encodings.encodingZRLE: return "ZRLE";
case encodings.encodingTightPNG: return "TightPNG";
case encodings.encodingJPEG: return "JPEG";
case encodings.encodingH264: return "H.264";
default: return "[unknown encoding " + num + "]";
}
}

View File

@ -10,7 +10,7 @@
import { toUnsigned32bit, toSigned32bit } from './util/int.js';
import * as Log from './util/logging.js';
import { encodeUTF8, decodeUTF8 } from './util/strings.js';
import { dragThreshold } from './util/browser.js';
import { dragThreshold, supportsWebCodecsH264Decode } from './util/browser.js';
import { clientToElement } from './util/element.js';
import { setCapture } from './util/events.js';
import EventTargetMixin from './util/eventtarget.js';
@ -36,6 +36,7 @@ import TightDecoder from "./decoders/tight.js";
import TightPNGDecoder from "./decoders/tightpng.js";
import ZRLEDecoder from "./decoders/zrle.js";
import JPEGDecoder from "./decoders/jpeg.js";
import H264Decoder from "./decoders/h264.js";
// How many seconds to wait for a disconnect to finish
const DISCONNECT_TIMEOUT = 3;
@ -250,6 +251,7 @@ export default class RFB extends EventTargetMixin {
this._decoders[encodings.encodingTightPNG] = new TightPNGDecoder();
this._decoders[encodings.encodingZRLE] = new ZRLEDecoder();
this._decoders[encodings.encodingJPEG] = new JPEGDecoder();
this._decoders[encodings.encodingH264] = new H264Decoder();
// NB: nothing that needs explicit teardown should be done
// before this point, since this can throw an exception
@ -2117,6 +2119,9 @@ export default class RFB extends EventTargetMixin {
encs.push(encodings.encodingCopyRect);
// Only supported with full depth support
if (this._fbDepth == 24) {
if (supportsWebCodecsH264Decode) {
encs.push(encodings.encodingH264);
}
encs.push(encodings.encodingTight);
encs.push(encodings.encodingTightPNG);
encs.push(encodings.encodingZRLE);

View File

@ -70,6 +70,26 @@ try {
}
export const hasScrollbarGutter = _hasScrollbarGutter;
export let supportsWebCodecsH264Decode = false;
async function _checkWebCodecsH264DecodeSupport() {
if (!('VideoDecoder' in window)) {
return;
}
// We'll need to make do with some placeholders here
const config = {
codec: 'avc1.42401f',
codedWidth: 1920,
codedHeight: 1080,
optimizeForLatency: true,
};
const result = await VideoDecoder.isConfigSupported(config);
supportsWebCodecsH264Decode = result.supported;
}
_checkWebCodecsH264DecodeSupport();
/*
* The functions for detection of platforms and browsers below are exported
* but the use of these should be minimized as much as possible.

264
tests/test.h264.js Normal file
View File

@ -0,0 +1,264 @@
import Websock from '../core/websock.js';
import Display from '../core/display.js';
import { H264Parser } from '../core/decoders/h264.js';
import H264Decoder from '../core/decoders/h264.js';
import Base64 from '../core/base64.js';
import FakeWebSocket from './fake.websocket.js';
/* This is a 3 frame 16x16 video where the first frame is solid red, the second
* is solid green and the third is solid blue.
*
* The colour space is BT.709. It is encoded into the stream.
*/
const redGreenBlue16x16Video = new Uint8Array(Base64.decode(
'AAAAAWdCwBTZnpuAgICgAAADACAAAAZB4oVNAAAAAWjJYyyAAAABBgX//4HcRem95tlIt5Ys' +
'2CDZI+7veDI2NCAtIGNvcmUgMTY0IHIzMTA4IDMxZTE5ZjkgLSBILjI2NC9NUEVHLTQgQVZD' +
'IGNvZGVjIC0gQ29weWxlZnQgMjAwMy0yMDIzIC0gaHR0cDovL3d3dy52aWRlb2xhbi5vcmcv' +
'eDI2NC5odG1sIC0gb3B0aW9uczogY2FiYWM9MCByZWY9NSBkZWJsb2NrPTE6MDowIGFuYWx5' +
'c2U9MHgxOjB4MTExIG1lPWhleCBzdWJtZT04IHBzeT0xIHBzeV9yZD0xLjAwOjAuMDAgbWl4' +
'ZWRfcmVmPTEgbWVfcmFuZ2U9MTYgY2hyb21hX21lPTEgdHJlbGxpcz0yIDh4OGRjdD0wIGNx' +
'bT0wIGRlYWR6b25lPTIxLDExIGZhc3RfcHNraXA9MSBjaHJvbWFfcXBfb2Zmc2V0PS0yIHRo' +
'cmVhZHM9MSBsb29rYWhlYWRfdGhyZWFkcz0xIHNsaWNlZF90aHJlYWRzPTAgbnI9MCBkZWNp' +
'bWF0ZT0xIGludGVybGFjZWQ9MCBibHVyYXlfY29tcGF0PTAgY29uc3RyYWluZWRfaW50cmE9' +
'MCBiZnJhbWVzPTAgd2VpZ2h0cD0wIGtleWludD1pbmZpbml0ZSBrZXlpbnRfbWluPTI1IHNj' +
'ZW5lY3V0PTQwIGludHJhX3JlZnJlc2g9MCByY19sb29rYWhlYWQ9NTAgcmM9YWJyIG1idHJl' +
'ZT0xIGJpdHJhdGU9NDAwIHJhdGV0b2w9MS4wIHFjb21wPTAuNjAgcXBtaW49MCBxcG1heD02' +
'OSBxcHN0ZXA9NCBpcF9yYXRpbz0xLjQwIGFxPTE6MS4wMACAAAABZYiEBrxmKAAPVccAAS04' +
'4AA5DRJMnkycJk4TPwAAAAFBiIga8RigADVVHAAGaGOAANtuAAAAAUGIkBr///wRRQABVf8c' +
'AAcho4AAiD4='));
let _haveH264Decode = null;
async function haveH264Decode() {
if (_haveH264Decode !== null) {
return _haveH264Decode;
}
if (!('VideoDecoder' in window)) {
_haveH264Decode = false;
return false;
}
// We'll need to make do with some placeholders here
const config = {
codec: 'avc1.42401f',
codedWidth: 1920,
codedHeight: 1080,
optimizeForLatency: true,
};
_haveH264Decode = await VideoDecoder.isConfigSupported(config);
return _haveH264Decode;
}
function createSolidColorFrameBuffer(color, width, height) {
const r = (color >> 24) & 0xff;
const g = (color >> 16) & 0xff;
const b = (color >> 8) & 0xff;
const a = (color >> 0) & 0xff;
const size = width * height * 4;
let array = new Uint8ClampedArray(size);
for (let i = 0; i < size / 4; ++i) {
array[i * 4 + 0] = r;
array[i * 4 + 1] = g;
array[i * 4 + 2] = b;
array[i * 4 + 3] = a;
}
return array;
}
function makeMessageHeader(length, resetContext, resetAllContexts) {
let flags = 0;
if (resetContext) {
flags |= 1;
}
if (resetAllContexts) {
flags |= 2;
}
let header = new Uint8Array(8);
let i = 0;
let appendU32 = (v) => {
header[i++] = (v >> 24) & 0xff;
header[i++] = (v >> 16) & 0xff;
header[i++] = (v >> 8) & 0xff;
header[i++] = v & 0xff;
};
appendU32(length);
appendU32(flags);
return header;
}
function wrapRectData(data, resetContext, resetAllContexts) {
let header = makeMessageHeader(data.length, resetContext, resetAllContexts);
return Array.from(header).concat(Array.from(data));
}
function testDecodeRect(decoder, x, y, width, height, data, display, depth) {
let sock;
let done = false;
sock = new Websock;
sock.open("ws://example.com");
sock.on('message', () => {
done = decoder.decodeRect(x, y, width, height, sock, display, depth);
});
// Empty messages are filtered at multiple layers, so we need to
// do a direct call
if (data.length === 0) {
done = decoder.decodeRect(x, y, width, height, sock, display, depth);
} else {
sock._websocket._receiveData(new Uint8Array(data));
}
display.flip();
return done;
}
function almost(a, b) {
let diff = Math.abs(a - b);
return diff < 5;
}
describe('H.264 Parser', function () {
it('should parse constrained baseline video', function () {
let parser = new H264Parser(redGreenBlue16x16Video);
let frame = parser.parse();
expect(frame).to.have.property('key', true);
expect(parser).to.have.property('profileIdc', 66);
expect(parser).to.have.property('constraintSet', 192);
expect(parser).to.have.property('levelIdc', 20);
frame = parser.parse();
expect(frame).to.have.property('key', false);
frame = parser.parse();
expect(frame).to.have.property('key', false);
frame = parser.parse();
expect(frame).to.be.null;
});
});
describe('H.264 Decoder Unit Test', function () {
let decoder;
beforeEach(async function () {
if (!await haveH264Decode()) {
this.skip();
return;
}
decoder = new H264Decoder();
});
it('creates and resets context', function () {
let context = decoder._getContext(1, 2, 3, 4);
expect(context._width).to.equal(3);
expect(context._height).to.equal(4);
expect(decoder._contexts).to.not.be.empty;
decoder._resetContext(1, 2, 3, 4);
expect(decoder._contexts).to.be.empty;
});
it('resets all contexts', function () {
decoder._getContext(0, 0, 1, 1);
decoder._getContext(2, 2, 1, 1);
expect(decoder._contexts).to.not.be.empty;
decoder._resetAllContexts();
expect(decoder._contexts).to.be.empty;
});
it('caches contexts', function () {
let c1 = decoder._getContext(1, 2, 3, 4);
c1.lastUsed = 1;
let c2 = decoder._getContext(1, 2, 3, 4);
c2.lastUsed = 2;
expect(Object.keys(decoder._contexts).length).to.equal(1);
expect(c1.lastUsed).to.equal(c2.lastUsed);
});
it('deletes oldest context', function () {
for (let i = 0; i < 65; ++i) {
let context = decoder._getContext(i, 0, 1, 1);
context.lastUsed = i;
}
expect(decoder._findOldestContextId()).to.equal('1,0,1,1');
expect(decoder._contexts[decoder._contextId(0, 0, 1, 1)]).to.be.undefined;
expect(decoder._contexts[decoder._contextId(1, 0, 1, 1)]).to.not.be.null;
expect(decoder._contexts[decoder._contextId(63, 0, 1, 1)]).to.not.be.null;
expect(decoder._contexts[decoder._contextId(64, 0, 1, 1)]).to.not.be.null;
});
});
describe('H.264 Decoder Functional Test', function () {
let decoder;
let display;
before(FakeWebSocket.replace);
after(FakeWebSocket.restore);
beforeEach(async function () {
if (!await haveH264Decode()) {
this.skip();
return;
}
decoder = new H264Decoder();
display = new Display(document.createElement('canvas'));
display.resize(16, 16);
});
it('should handle H.264 rect', async function () {
let data = wrapRectData(redGreenBlue16x16Video, false, false);
let done = testDecodeRect(decoder, 0, 0, 16, 16, data, display, 24);
expect(done).to.be.true;
await display.flush();
let targetData = createSolidColorFrameBuffer(0x0000ffff, 16, 16);
expect(display).to.have.displayed(targetData, almost);
});
it('should handle specific context reset', async function () {
let data = wrapRectData(redGreenBlue16x16Video, false, false);
let done = testDecodeRect(decoder, 0, 0, 16, 16, data, display, 24);
expect(done).to.be.true;
await display.flush();
let targetData = createSolidColorFrameBuffer(0x0000ffff, 16, 16);
expect(display).to.have.displayed(targetData, almost);
data = wrapRectData([], true, false);
done = testDecodeRect(decoder, 0, 0, 16, 16, data, display, 24);
expect(done).to.be.true;
await display.flush();
expect(decoder._contexts[decoder._contextId(0, 0, 16, 16)]._decoder).to.be.null;
});
it('should handle global context reset', async function () {
let data = wrapRectData(redGreenBlue16x16Video, false, false);
let done = testDecodeRect(decoder, 0, 0, 16, 16, data, display, 24);
expect(done).to.be.true;
await display.flush();
let targetData = createSolidColorFrameBuffer(0x0000ffff, 16, 16);
expect(display).to.have.displayed(targetData, almost);
data = wrapRectData([], false, true);
done = testDecodeRect(decoder, 0, 0, 16, 16, data, display, 24);
expect(done).to.be.true;
await display.flush();
expect(decoder._contexts[decoder._contextId(0, 0, 16, 16)]._decoder).to.be.null;
});
});