# 语音识别
机器学习如何处理语音信号呢?声音在计算机里是声谱图(也是处理图像),因此可以使用卷积神经网络,模型接收声音信息,输出分类信息。
# 操作步骤
- 加载预训练语音识别模型
import * as speechCommands from '@tensorflow-models/speech-commands';
window.onload = async () => {
const recognizer = speechCommands.create(
'BROWSER_FFT',
null,
'xxx/model.json',
'xxx/metadata.json',
)
await recognizer.ensureModelLoaded();
}
- 进行语音识别
recognizer.listen(result => {
const { scores } = result;
const maxValue = Math.max(...scores);
const index = scores.indexOf(maxValue);
console.log(labels[index]);
}, {
// 识别频率
overlapFactor: 0.3,
// 75%的相似度
probabilityThreshold: 0.75
})
# 迁移学习
- 在浏览器中收集中文语音
transferRecognizer = recognizer.createTransfer('收集声音');
- 使用 speech commands 包进行迁移学习并预测
await transferRecognizer.collectExample(
label
)
- 训练
await transferRecognizer.train({
epochs: 30,
callback: () => {
// 比如可视化
}
})
- 保存训练数据
const arrayBuffer = transferRecognizer.serializeExamples();
const blob = new Blob([arrayBuffer]);
const link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
link.download = 'data.bin';
link.click();
← 机器学习