audio: implement re-sync, move code to separate file
Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
This commit is contained in:
parent
6f8f4bff62
commit
3721da90dc
|
@ -0,0 +1,170 @@
|
||||||
|
// The RFB protocol (VNC) is designed for real-time user interactions
|
||||||
|
// and allows transferring audio messages together with screen content.
|
||||||
|
// It is not possible to use any kind of buffering, because that would
|
||||||
|
// introduce large delays between user interaction and content display.
|
||||||
|
//
|
||||||
|
// This is not really a problem with screen content, because the human
|
||||||
|
// brain is quite tolerate about slight speed changes in video content,
|
||||||
|
// and we mostly transfer non-video data anyways.
|
||||||
|
//
|
||||||
|
// With audio, the situation is quite different, as it must be played
|
||||||
|
// at a constant speed. Any delay leads to audio distortion, which is
|
||||||
|
// unpleasant for humans.
|
||||||
|
//
|
||||||
|
// Without buffering, it is always possible for audio frames to arrive
|
||||||
|
// too late or too early due to changing network speeds.
|
||||||
|
//
|
||||||
|
// We use the following algorithm:
|
||||||
|
//
|
||||||
|
// - small Jitter buffer to tolerate small speed changes (20ms)
|
||||||
|
// - simply discard late audio frame
|
||||||
|
// - Queue early frames with slight speedup (pitch scale) to re-sync audio
|
||||||
|
// - if we get to many early frames, skip frames for fast re-sync
|
||||||
|
//
|
||||||
|
// ## Audio format
|
||||||
|
//
|
||||||
|
// We use/expect U16 raw audio data.
|
||||||
|
|
||||||
|
import * as Log from './util/logging.js';
|
||||||
|
|
||||||
|
export default class Audio {
|
||||||
|
constructor(sample_rate, nchannels) {
|
||||||
|
this._next_start = 0;
|
||||||
|
this._context = null;
|
||||||
|
this._jitter = 0.02;
|
||||||
|
this._resample_trigger = 5*this._jitter;
|
||||||
|
this._stable_time = 1.0;
|
||||||
|
|
||||||
|
// ===== PROPERTIES =====
|
||||||
|
this._sample_rate = sample_rate;
|
||||||
|
this._nchannels = nchannels;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===== PROPERTIES =====
|
||||||
|
get sample_rate() { return this._sample_rate; }
|
||||||
|
get nchannels() { return this._nchannels; }
|
||||||
|
|
||||||
|
// ===== PUBLIC METHODS =====
|
||||||
|
|
||||||
|
// Stop audio playback
|
||||||
|
//
|
||||||
|
// Further audio frames are simply dropped.
|
||||||
|
stop() {
|
||||||
|
this._context = null;
|
||||||
|
this._next_start = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
start() {
|
||||||
|
this._context = new AudioContext({
|
||||||
|
latencyHint: "interactive",
|
||||||
|
sampleRate: this._sample_rate,
|
||||||
|
});
|
||||||
|
this._next_start = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
play(payload) {
|
||||||
|
if (this._context === null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ctime = this._context.currentTime;
|
||||||
|
|
||||||
|
let time_offset = this._next_start - ctime;
|
||||||
|
|
||||||
|
let sample_bytes = 2*this._nchannels;
|
||||||
|
|
||||||
|
if ((time_offset < this._jitter) && (this._resample_trigger !== 5*this._jitter)) {
|
||||||
|
Log.Debug("Stop resampling because audio is in sync (delay = " + time_offset + " sec)");
|
||||||
|
this._resample_trigger = 5*this._jitter;
|
||||||
|
}
|
||||||
|
|
||||||
|
let buffer = null;
|
||||||
|
if (time_offset > this._resample_trigger && (payload.length > (100*sample_bytes))) {
|
||||||
|
if (this._resample_trigger !== this._jitter) {
|
||||||
|
Log.Debug("Start resampling to re-sync audio (delay = " + time_offset + " sec)");
|
||||||
|
this._resample_trigger = this._jitter;
|
||||||
|
}
|
||||||
|
buffer = this._pitchScale(payload, 1.01); // increase pitch by 1%
|
||||||
|
} else {
|
||||||
|
buffer = this._createBuffer(payload);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this._next_start > 0) {
|
||||||
|
if (time_offset < -buffer.duration) {
|
||||||
|
Log.Warn("Skip delayed audio frame (delay = " + (-time_offset) + " sec)");
|
||||||
|
this._next_start = ctime + this._jitter;
|
||||||
|
return true; // do not play delayed frame - skip it!
|
||||||
|
}
|
||||||
|
if (time_offset > 0.5) {
|
||||||
|
Log.Warn("Move fast audio frame (offset = " + time_offset + " sec)");
|
||||||
|
this._stable_time = 0;
|
||||||
|
return true; // skip frame.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this._stable_time += buffer.duration;
|
||||||
|
|
||||||
|
if (this._next_start === 0) {
|
||||||
|
this._next_start = ctime + this._jitter;
|
||||||
|
}
|
||||||
|
|
||||||
|
let start_time = this._next_start;
|
||||||
|
this._next_start += buffer.duration;
|
||||||
|
|
||||||
|
if (this._stable_time >= 1.0) {
|
||||||
|
let source = this._context.createBufferSource();
|
||||||
|
source.buffer = buffer;
|
||||||
|
source.connect(this._context.destination);
|
||||||
|
source.start(start_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ===== PRIVATE METHODS =====
|
||||||
|
|
||||||
|
// see: https://en.wikipedia.org/wiki/Audio_time_stretching_and_pitch_scaling
|
||||||
|
_pitchScale(payload, factor) {
|
||||||
|
let sample_bytes = 2*this._nchannels;
|
||||||
|
let new_length = Math.ceil(payload.length/(factor*sample_bytes));
|
||||||
|
|
||||||
|
let buffer = this._context.createBuffer(this._nchannels, new_length, this._sample_rate);
|
||||||
|
for (let ch = 0; ch < this._nchannels; ch++) {
|
||||||
|
const channel = buffer.getChannelData(ch);
|
||||||
|
let channel_offset = ch*2;
|
||||||
|
for (let i = 0; i < buffer.length; i++) {
|
||||||
|
let pos_float = i*factor;
|
||||||
|
let j = Math.trunc(pos_float);
|
||||||
|
let second_weight = pos_float % 1;
|
||||||
|
let first_weight = 1 - second_weight;
|
||||||
|
let p = j*sample_bytes + channel_offset;
|
||||||
|
let value0 = payload[p] + payload[p+1]*256;
|
||||||
|
p += sample_bytes;
|
||||||
|
let value1 = value0;
|
||||||
|
if (p < payload.length) {
|
||||||
|
value1 = payload[p] + payload[p+1]*256;
|
||||||
|
}
|
||||||
|
let value = (value0*first_weight + value1*second_weight);
|
||||||
|
channel[i] = (value / 32768.0) - 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
_createBuffer(payload) {
|
||||||
|
let sample_bytes = 2*this._nchannels;
|
||||||
|
let buffer = this._context.createBuffer(
|
||||||
|
this._nchannels, payload.length/sample_bytes, this._sample_rate);
|
||||||
|
|
||||||
|
for (let ch = 0; ch < this._nchannels; ch++) {
|
||||||
|
const channel = buffer.getChannelData(ch);
|
||||||
|
let channel_offset = ch*2;
|
||||||
|
for (let i = 0; i < buffer.length; i++) {
|
||||||
|
let p = i*sample_bytes + channel_offset;
|
||||||
|
let value = payload[p] + payload[p+1]*256;
|
||||||
|
channel[i] = (value / 32768.0) - 1.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
}
|
67
core/rfb.js
67
core/rfb.js
|
@ -14,6 +14,7 @@ import { dragThreshold, supportsWebCodecsH264Decode } from './util/browser.js';
|
||||||
import { clientToElement } from './util/element.js';
|
import { clientToElement } from './util/element.js';
|
||||||
import { setCapture } from './util/events.js';
|
import { setCapture } from './util/events.js';
|
||||||
import EventTargetMixin from './util/eventtarget.js';
|
import EventTargetMixin from './util/eventtarget.js';
|
||||||
|
import Audio from "./audio.js";
|
||||||
import Display from "./display.js";
|
import Display from "./display.js";
|
||||||
import Inflator from "./inflator.js";
|
import Inflator from "./inflator.js";
|
||||||
import Deflator from "./deflator.js";
|
import Deflator from "./deflator.js";
|
||||||
|
@ -157,10 +158,7 @@ export default class RFB extends EventTargetMixin {
|
||||||
this._qemuAudioSupported = false;
|
this._qemuAudioSupported = false;
|
||||||
this._page_had_user_interaction = false;
|
this._page_had_user_interaction = false;
|
||||||
this._audio_enable = false;
|
this._audio_enable = false;
|
||||||
this._audio_next_start = 0;
|
this._audio = new Audio(44100, 2);
|
||||||
this._audio_sample_rate = 44100;
|
|
||||||
this._audio_channels = 2;
|
|
||||||
this._audio_context = null;
|
|
||||||
|
|
||||||
this._extendedPointerEventSupported = false;
|
this._extendedPointerEventSupported = false;
|
||||||
|
|
||||||
|
@ -2697,7 +2695,7 @@ export default class RFB extends EventTargetMixin {
|
||||||
|
|
||||||
case encodings.pseudoEncodingQEMUAudioEvent:
|
case encodings.pseudoEncodingQEMUAudioEvent:
|
||||||
if (!this._qemuAudioSupported) {
|
if (!this._qemuAudioSupported) {
|
||||||
RFB.messages.enableQemuAudioUpdates(this._sock, this._audio_channels, this._audio_sample_rate);
|
RFB.messages.enableQemuAudioUpdates(this._sock, this._audio.nchannels, this._audio.sample_rate);
|
||||||
this._qemuAudioSupported = true;
|
this._qemuAudioSupported = true;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
@ -2739,16 +2737,11 @@ export default class RFB extends EventTargetMixin {
|
||||||
|
|
||||||
switch (operation) {
|
switch (operation) {
|
||||||
case 0: {
|
case 0: {
|
||||||
this._audio_context = null;
|
this._audio.stop();
|
||||||
this._audio_next_start = 0;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case 1: {
|
case 1: {
|
||||||
this._audio_context = new AudioContext({
|
this._audio.start();
|
||||||
latencyHint: "interactive",
|
|
||||||
sampleRate: this._audio_sample_rate,
|
|
||||||
});
|
|
||||||
this._audio_next_start = 0;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
case 2: break;
|
case 2: break;
|
||||||
|
@ -2764,47 +2757,21 @@ export default class RFB extends EventTargetMixin {
|
||||||
|
|
||||||
const length = this._sock.rQshift32();
|
const length = this._sock.rQshift32();
|
||||||
|
|
||||||
|
if (length === 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (this._sock.rQwait("audio payload", length, 8)) {
|
if (this._sock.rQwait("audio payload", length, 8)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (length !== 0) {
|
let payload = this._sock.rQshiftBytes(length, false);
|
||||||
let payload = this._sock.rQshiftBytes(length, false);
|
|
||||||
|
|
||||||
if (this._audio_context === null) {
|
if (!this._page_had_user_interaction || !this._audio_enable) {
|
||||||
return false;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
let sample_bytes = 2*this._audio_channels;
|
|
||||||
let buffer = this._audio_context.createBuffer(this._audio_channels, length/sample_bytes, this._audio_sample_rate);
|
|
||||||
|
|
||||||
for (let ch = 0; ch < this._audio_channels; ch++) {
|
|
||||||
const channel = buffer.getChannelData(ch);
|
|
||||||
let channel_offset = ch*2;
|
|
||||||
for (let i = 0; i < buffer.length; i++) {
|
|
||||||
let p = i*sample_bytes + channel_offset;
|
|
||||||
let value = payload[p] + payload[p+1]*256;
|
|
||||||
channel[i] = (value / 32768.0) - 1.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this._page_had_user_interaction && this._audio_enable) {
|
|
||||||
let ctime = this._audio_context.currentTime;
|
|
||||||
if (ctime > this._audio_next_start) {
|
|
||||||
this._audio_next_start = ctime;
|
|
||||||
}
|
|
||||||
let start_time = this._audio_next_start;
|
|
||||||
|
|
||||||
this._audio_next_start += buffer.duration;
|
|
||||||
|
|
||||||
let source = this._audio_context.createBufferSource();
|
|
||||||
source.buffer = buffer;
|
|
||||||
source.connect(this._audio_context.destination);
|
|
||||||
source.start(start_time);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return this._audio.play(payload);
|
||||||
}
|
}
|
||||||
|
|
||||||
enable_audio(value) {
|
enable_audio(value) {
|
||||||
|
@ -2812,7 +2779,7 @@ export default class RFB extends EventTargetMixin {
|
||||||
this._audio_enable = value;
|
this._audio_enable = value;
|
||||||
if (this._qemuAudioSupported) {
|
if (this._qemuAudioSupported) {
|
||||||
if (this._audio_enable) {
|
if (this._audio_enable) {
|
||||||
RFB.messages.enableQemuAudioUpdates(this._sock, this._audio_channels, this._audio_sample_rate);
|
RFB.messages.enableQemuAudioUpdates(this._sock, this._audio.nchannels, this._audio.sample_rate);
|
||||||
} else {
|
} else {
|
||||||
RFB.messages.disableQemuAudioUpdates(this._sock);
|
RFB.messages.disableQemuAudioUpdates(this._sock);
|
||||||
}
|
}
|
||||||
|
@ -3433,7 +3400,7 @@ RFB.messages = {
|
||||||
sock.flush();
|
sock.flush();
|
||||||
},
|
},
|
||||||
|
|
||||||
disableQemuAudioUpdates(sock, channels, sample_rate) {
|
disableQemuAudioUpdates(sock, nchannels, sample_rate) {
|
||||||
sock.sQpush8(255); // msg-type
|
sock.sQpush8(255); // msg-type
|
||||||
sock.sQpush8(1); // submessage-type
|
sock.sQpush8(1); // submessage-type
|
||||||
sock.sQpush16(1); // disable audio
|
sock.sQpush16(1); // disable audio
|
||||||
|
@ -3441,13 +3408,13 @@ RFB.messages = {
|
||||||
sock.flush();
|
sock.flush();
|
||||||
},
|
},
|
||||||
|
|
||||||
enableQemuAudioUpdates(sock, channels, sample_rate) {
|
enableQemuAudioUpdates(sock, nchannels, sample_rate) {
|
||||||
|
|
||||||
sock.sQpush8(255); // msg-type
|
sock.sQpush8(255); // msg-type
|
||||||
sock.sQpush8(1); // submessage-type
|
sock.sQpush8(1); // submessage-type
|
||||||
sock.sQpush16(2); // set sample format
|
sock.sQpush16(2); // set sample format
|
||||||
sock.sQpush8(2); // format U16
|
sock.sQpush8(2); // format U16
|
||||||
sock.sQpush8(channels);
|
sock.sQpush8(nchannels);
|
||||||
sock.sQpush32(sample_rate); // audio frequency
|
sock.sQpush32(sample_rate); // audio frequency
|
||||||
|
|
||||||
sock.sQpush8(255); // msg-type
|
sock.sQpush8(255); // msg-type
|
||||||
|
|
Loading…
Reference in New Issue