# 语音识别

机器学习如何处理语音信号呢?声音在计算机里是声谱图(也是处理图像),因此可以使用卷积神经网络,模型接收声音信息,输出分类信息。

# 操作步骤

  • 加载预训练语音识别模型
import * as speechCommands from '@tensorflow-models/speech-commands';

window.onload = async () => {
  const recognizer = speechCommands.create(
    'BROWSER_FFT',
    null,
    'xxx/model.json',
    'xxx/metadata.json',
  )

  await recognizer.ensureModelLoaded();
}
  • 进行语音识别
recognizer.listen(result => {
  const { scores } = result;
  const maxValue = Math.max(...scores);
  const index = scores.indexOf(maxValue);
  console.log(labels[index]);
}, {
  // 识别频率
  overlapFactor: 0.3,
  // 75%的相似度
  probabilityThreshold: 0.75
})

# 迁移学习

  • 在浏览器中收集中文语音
transferRecognizer = recognizer.createTransfer('收集声音');
  • 使用 speech commands 包进行迁移学习并预测
await transferRecognizer.collectExample(
  label
)
  • 训练
await transferRecognizer.train({
  epochs: 30,
  callback: () => {
    // 比如可视化
  }
})
  • 保存训练数据
const arrayBuffer = transferRecognizer.serializeExamples();
const blob = new Blob([arrayBuffer]);
const link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
link.download = 'data.bin';
link.click();