ScriptProcessorNode
一、认识
在 JavaScript
计算录制的音频的音量、获取音频样本数据通常涉及到使用 AudioContext API
和 ScriptProcessorNode
或者现代的 AudioWorkletNode
。本文采用 ScriptProcessorNode
精确实时的计算音频音量。请注意,由于 ScriptProcessorNode
已经被标记为过时,并且可能会在未来被移除,因此建议使用更现代的 AudioWorkletNode
。主要逻辑如下:
-
获取用户音频流: 通过
navigator.mediaDevices.getUserMedia({ audio: true })
-
创建音频上下文: 实例化一个新的
AudioContext
上下文, 用于处理和控制音频操作, 提供了一个用于音频处理和合成的工作环境。 -
创建音源节点(音频处理的输入源): 通过
audioContext.createMediaStreamSource(stream)
创建一个接口, 该接口可以从传入的媒体流(MediaStream
)对象中抽取音频数据作为音频上下文(AudioContext
)里的一个音源节点。 -
创建音频处理流: 通过
audioContext.createScriptProcessor(bufferSize, numberOfInputChannels, numberOfOutputChannels)
创建音频处理流, 可以通过JavaScript
直接处理音频流 -
监听音频处理流的
audioprocess
事件: 可以通过监听该节点的audioprocess
事件,来对进来的音频流进行实时的处理, 在该事件的处理函数中获取输入缓冲区的音频数据,执行一些操作,然后将结果写入输出缓冲区。操作后续所示。 -
取出第一个信道的样本数组: 取出第一个信道的样本数组, 通常是左声道的音频数据。
-
将声道音频样本重新采样到
16000Hz(16kHz)
的采样率: 使用了线性插值法来生成新的采样点。计算出新数据点的位置,然后在原始采样点之间进行插值,以获得新采样点的值。为什么要重新采样: 很多音频处理、识别或传输场景,特别是在语音识别技术中,需要特定的采样率(如16kHz
),因为它可以覆盖讲话的频率范围并且数据量适中。原始音频样本一般在96 HZ
以上, 那么数据量也会相应的很大。通过将音频重采样到16kHz
,可以显著减少数据的大小,这在数据传输、存储空间以及处理速度方面都是有益的。较低的采样率意味着处理时的计算量较小, 处理更高采样率的音频会增加算法的延迟和功耗。重采样到16kHz
的操作是为了平衡音频质量、数据大小和计算效率,在确保足够音质的前提下,降低数据处理和存储成本,同时适应特定的应用场景需求。 -
将声道音频样本浮点数组转换为
16
位PCM
格式: 创建一个用于存储16
位PCM
数据的ArrayBuffer
, 并将浮点数值转换为16
位有符号整型值(在-32768
到32767
的范围内),按照Little-endian
格式存储。为什么要使用16
位PCM
格式: 因为它是许多音频和语音处理系统中的一个标准格式。
二、实现
2.1 index.js
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
const audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(stream);
function to16kHz(audioData, sampleRate = 44100) {
const data = new Float32Array(audioData);
const fitCount = Math.round(data.length * (16000 / sampleRate));
const newData = new Float32Array(fitCount);
const springFactor = (data.length - 1) / (fitCount - 1);
newData[0] = data[0];
for (let i = 1; i < fitCount - 1; i++) {
const tmp = i * springFactor;
const before = Math.floor(tmp).toFixed();
const after = Math.ceil(tmp).toFixed();
const atPoint = tmp - before;
newData[i] = data[before] + (data[after] - data[before]) * atPoint;
}
newData[fitCount - 1] = data[data.length - 1];
return newData;
}
function to16BitPCM(input) {
const dataLength = input.length * (16 / 8);
const dataBuffer = new ArrayBuffer(dataLength);
const dataView = new DataView(dataBuffer);
let offset = 0;
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return dataView;
}
async function audioRecorder() {
const scriptProcessor = audioContext.createScriptProcessor(1024, 1, 1);
source.connect(scriptProcessor);
scriptProcessor.connect(audioContext.destination);
scriptProcessor.onaudioprocess = event => {
const samples = event.inputBuffer.getChannelData(0);
const output = to16kHz(samples);
const audioBuffer = to16BitPCM(output);
console.log("audioBuffer: ", audioBuffer);
}
}
audioRecorder();
2.2 index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>音频采样</title>
</head>
<body>
<script type="module" src="./index.js"></script>
</body>
</html>
三、兼容
3.1 processor.js
function isSupportAudioWorklet(audioContext) {
return (
audioContext.audioWorklet &&
typeof audioContext.audioWorklet.addModule === 'function' &&
typeof AudioWorkletNode !== 'undefined'
);
}
function isSupportCreateScriptProcessor(audioContext) {
return typeof audioContext.createScriptProcessor === 'function';
}
function to16kHz(audioData, sampleRate = 44100) {
const data = new Float32Array(audioData);
const fitCount = Math.round(data.length * (16000 / sampleRate));
const newData = new Float32Array(fitCount);
const springFactor = (data.length - 1) / (fitCount - 1);
newData[0] = data[0];
for (let i = 1; i < fitCount - 1; i++) {
const tmp = i * springFactor;
const before = Math.floor(tmp).toFixed();
const after = Math.ceil(tmp).toFixed();
const atPoint = tmp - before;
newData[i] = data[before] + (data[after] - data[before]) * atPoint;
}
newData[fitCount - 1] = data[data.length - 1];
return newData;
}
function to16BitPCM(input) {
const dataLength = input.length * (16 / 8);
const dataBuffer = new ArrayBuffer(dataLength);
const dataView = new DataView(dataBuffer);
let offset = 0;
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return dataView;
}
export default class Processor {
constructor(options) {
const { stream } = options;
this.options = options;
this.audioContext = new AudioContext();
this.mediaStreamSource = this.audioContext.createMediaStreamSource(stream);
this.init();
}
init() {
if (isSupportAudioWorklet(this.audioContext)) {
this.audioWorkletNodeDealAudioData();
} else {
this.scriptNodeDealAudioData();
}
}
scriptNodeDealAudioData() {
if (!isSupportCreateScriptProcessor(this.audioContext)) {
return;
}
try {
const scriptProcessor = this.audioContext.createScriptProcessor(
1024,
1,
1
);
this.mediaStreamSource.connect(scriptProcessor);
scriptProcessor.connect(this.audioContext.destination);
scriptProcessor.onaudioprocess = event => {
const samples = event.inputBuffer.getChannelData(0);
const output = to16kHz(samples);
const audioBuffer = to16BitPCM(output);
const data = {
buffer: audioBuffer
};
this.options.processRecord?.(data);
};
} catch (e) {
console.log('scriptNodeDealAudioData 错误原因:', e);
}
}
async audioWorkletNodeDealAudioData() {
if (!isSupportAudioWorklet(this.audioContext)) {
return;
}
try {
await this.audioContext.audioWorklet.addModule('http://127.0.0.1:5502/test/javascript/audioRecord/022301/processor/custom-processor.js');
const customNode = new AudioWorkletNode(
this.audioContext,
'custom-processor'
);
this.mediaStreamSource
.connect(customNode)
.connect(this.audioContext.destination);
customNode.port.onmessage = event => {
const { audioBuffer } = event.data;
const data = {
buffer: audioBuffer
};
this.options.processRecord?.(data);
};
} catch (e) {
console.log('audioWorkletNodeDealAudioData 错误原因:', e);
}
}
}
3.2 custom-processor.js
function to16kHz(audioData, sampleRate = 44100) {
const data = new Float32Array(audioData);
const fitCount = Math.round(data.length * (16000 / sampleRate));
const newData = new Float32Array(fitCount);
const springFactor = (data.length - 1) / (fitCount - 1);
newData[0] = data[0];
for (let i = 1; i < fitCount - 1; i++) {
const tmp = i * springFactor;
const before = Math.floor(tmp).toFixed();
const after = Math.ceil(tmp).toFixed();
const atPoint = tmp - before;
newData[i] = data[before] + (data[after] - data[before]) * atPoint;
}
newData[fitCount - 1] = data[data.length - 1];
return newData;
}
function to16BitPCM(input) {
const dataLength = input.length * (16 / 8);
const dataBuffer = new ArrayBuffer(dataLength);
const dataView = new DataView(dataBuffer);
let offset = 0;
for (let i = 0; i < input.length; i++, offset += 2) {
const s = Math.max(-1, Math.min(1, input[i]));
dataView.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
}
return dataView;
}
class CustomProcessor extends AudioWorkletProcessor {
constructor(options) {
super(options);
}
process(inputs) {
const input = inputs[0];
if (!input || input.length === 0) {
return;
}
const samples = input[0];
const output = to16kHz(samples);
const audioBuffer = to16BitPCM(output);
this.port.postMessage({ audioBuffer });
return true;
}
}
registerProcessor('custom-processor', CustomProcessor);