首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何训练Tf.js音频识别模型来识别3个以上的命令?

如何训练Tf.js音频识别模型来识别3个以上的命令?
EN

Stack Overflow用户
提问于 2020-05-19 03:52:18
回答 1查看 411关注 0票数 1

我在这里学习了Tensorflow.js音频识别教程:https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html?index=..%2F..index#5。我更改了命令,删除了滑块和函数moveSlider(),并简单地将标签显示在"console“目录中。你可以在这里找到我的代码:https://codepen.io/willrd123/pen/abvQbyG?editors=0010

代码语言:javascript
复制
<html>
  <head>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands"></script>
  </head>
  <body>
    <button id="start" onmousedown="collect(0)">Start</button>
    <button id="forward" onmousedown="collect(1)">Forward</button>
    <button id="back" onmousedown="collect(2)">Back</button>
    <button id="left" onmousedown="collect(3)">Left</button>
    <button id="right" onmousedown="collect(4)">Right</button>
    <button id="up" onmousedown="collect(5)">Up</button>
    <button id="down" onmousedown="collect(6)">Down</button>
    <button id="stop" onmousedown="collect(7)">Stop</button>
    <button id="takeOff" onmousedown="collect(8)">Take Off</button>
    <button id="land" onmousedown="collect(9)">Land</button>
    <button id="flip" onmousedown="collect(10)">Flip</button>
    <button id="switchView" onmousedown="collect(11)">Switch View</button>
    <button id="noise" onmousedown="collect(12)">Noise</button>
    <br/><br/>
    <button id="train" onclick="train()">Train</button>
    <button id="listen" onclick="listen()">Listen</button>
    <button id="save" onclick="save()">Save</button>
    <br/><br/>
    <div id="console"></div>
    <script src="index.js"></script>
  </body>
</html>
代码语言:javascript
复制
let recognizer;

async function app() {
 recognizer = speechCommands.create('BROWSER_FFT');
 await recognizer.ensureModelLoaded();
 // Add this line.
 buildModel();
}

app();

// One frame is ~23ms of audio.
const NUM_FRAMES = 6;
let examples = [];

function collect(label) {
 if (recognizer.isListening()) {
   return recognizer.stopListening();
 }
 if (label == null) {
   return;
 }
 recognizer.listen(async ({spectrogram: {frameSize, data}}) => {
   let vals = normalize(data.subarray(-frameSize * NUM_FRAMES));
   examples.push({vals, label});
   document.querySelector('#console').textContent =
       `${examples.length} examples collected`;
 }, {
   overlapFactor: 0.999,
   includeSpectrogram: true,
   invokeCallbackOnNoiseAndUnknown: true
 });
}

function normalize(x) {
 const mean = -100;
 const std = 10;
 return x.map(x => (x - mean) / std);
}

const INPUT_SHAPE = [NUM_FRAMES, 232, 1];
let model;

async function train() {
 toggleButtons(false);
 const ys = tf.oneHot(examples.map(e => e.label), 3);
 const xsShape = [examples.length, ...INPUT_SHAPE];
 const xs = tf.tensor(flatten(examples.map(e => e.vals)), xsShape);

 await model.fit(xs, ys, {
   batchSize: 16,
   epochs: 10,
   callbacks: {
     onEpochEnd: (epoch, logs) => {
       document.querySelector('#console').textContent =
           `Accuracy: ${(logs.acc * 100).toFixed(1)}% Epoch: ${epoch + 1}`;
     }
   }
 });
 tf.dispose([xs, ys]);
 toggleButtons(true);
}

function buildModel() {
 model = tf.sequential();
 model.add(tf.layers.depthwiseConv2d({
   depthMultiplier: 8,
   kernelSize: [NUM_FRAMES,  3],
   activation: 'relu',
   inputShape: INPUT_SHAPE
 }));
 model.add(tf.layers.maxPooling2d({poolSize: [1, 2], strides: [2, 2]}));
 model.add(tf.layers.flatten());
 model.add(tf.layers.dense({units: 3, activation: 'softmax'}));
 const optimizer = tf.train.adam(0.01);
 model.compile({
   optimizer,
   loss: 'categoricalCrossentropy',
   metrics: ['accuracy']
 });
}

function toggleButtons(enable) {
 document.querySelectorAll('button').forEach(b => b.disabled = !enable);
}

function flatten(tensors) {
 const size = tensors[0].length;
 const result = new Float32Array(tensors.length * size);
 tensors.forEach((arr, i) => result.set(arr, i * size));
 return result;
}

var labels = ["Forward", "Back", "Left", "Right", "Up", "Down", "Take Off", "Land", "Switch View", "Noise"];
async function finish(labelTensor) {
 const label = (await labelTensor.data())[0];
 document.getElementById('console').textContent = labels[label];
} 

function listen() {
 if (recognizer.isListening()) {
   recognizer.stopListening();
   toggleButtons(true);
   document.getElementById('listen').textContent = 'Listen';
   return;
 }
 toggleButtons(false);
 document.getElementById('listen').textContent = 'Stop';
 document.getElementById('listen').disabled = false;

 recognizer.listen(async ({spectrogram: {frameSize, data}}) => {
   const vals = normalize(data.subarray(-frameSize * NUM_FRAMES));
   const input = tf.tensor(vals, [1, ...INPUT_SHAPE]);
   const probs = model.predict(input);
   const predLabel = probs.argMax(1);
   await finish(predLabel);
   tf.dispose([input, probs, predLabel]);
 }, {
   overlapFactor: 0.999,
   includeSpectrogram: true,
   invokeCallbackOnNoiseAndUnknown: true
 });
}

async function save () {
  const model = await tf.loadLayersModel(HTTP-Server/dronemodel.json');
}

然而,当我尝试将这段代码修改为13个不同的命令时,模型只返回了前3个命令(开始、前进和后退),即使我只为这3个命令之外的1个命令提供了音频,有什么方法可以解决这个问题吗?

EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2020-05-19 04:58:16

该模型在给定最后一层的单位3的情况下对三个类别进行分类。单元的数量必须更改为预期的命令数量(13),并且需要相应地训练模型。

票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/61877890

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档