From deca696eb9e83ded488afe4463e2686d6353cd4d Mon Sep 17 00:00:00 2001 From: hexins Date: Thu, 12 Oct 2023 09:13:01 +0000 Subject: [PATCH] change names for streaming and non-streaming server --- python-api-examples/http_server.py | 14 +- python-api-examples/non_streaming_server.py | 4 +- python-api-examples/streaming_server.py | 22 +- ...n_streaming.js => non_streaming_record.js} | 0 ...n_streaming.js => non_streaming_upload.js} | 0 .../web/js/offline_record_streaming.js | 424 ------------------ ...pload_streaming.js => streaming_upload.js} | 0 ...reaming.html => non_streaming_record.html} | 2 +- ...reaming.html => non_streaming_upload.html} | 2 +- .../web/offline_record_non_streaming.html | 81 ---- ...d_streaming.html => streaming_upload.html} | 2 +- 11 files changed, 30 insertions(+), 521 deletions(-) rename python-api-examples/web/js/{offline_record_non_streaming.js => non_streaming_record.js} (100%) rename python-api-examples/web/js/{upload_non_streaming.js => non_streaming_upload.js} (100%) delete mode 100644 python-api-examples/web/js/offline_record_streaming.js rename python-api-examples/web/js/{upload_streaming.js => streaming_upload.js} (100%) rename python-api-examples/web/{offline_record_streaming.html => non_streaming_record.html} (97%) rename python-api-examples/web/{upload_non_streaming.html => non_streaming_upload.html} (97%) delete mode 100644 python-api-examples/web/offline_record_non_streaming.html rename python-api-examples/web/{upload_streaming.html => streaming_upload.html} (97%) diff --git a/python-api-examples/http_server.py b/python-api-examples/http_server.py index 715d38259..91fa0366a 100644 --- a/python-api-examples/http_server.py +++ b/python-api-examples/http_server.py @@ -23,20 +23,18 @@ ("/js/bootstrap.min.js", "application/javascript"), ("/js/bootstrap.min.js.map", "application/javascript"), ("/js/jquery-3.6.0.min.js", "application/javascript"), - ("/js/offline_record_streaming.js", "application/javascript"), - ("/js/offline_record_non_streaming.js", "application/javascript"), + ("/js/non_streaming_record.js", "application/javascript"), ("/js/popper.min.js", "application/javascript"), ("/js/popper.min.js.map", "application/javascript"), ("/js/streaming_record.js", "application/javascript"), - ("/js/upload_streaming.js", "application/javascript"), - ("/js/upload_non_streaming.js", "application/javascript"), + ("/js/streaming_upload.js", "application/javascript"), + ("/js/non_streaming_upload.js", "application/javascript"), ("/k2-logo.png", "image/png"), ("/nav-partial.html", "text/html"), - ("/offline_record_streaming.html", "text/html"), - ("/offline_record_non_streaming.html", "text/html"), + ("/non_streaming_record.html", "text/html"), ("/streaming_record.html", "text/html"), - ("/upload_streaming.html", "text/html"), - ("/upload_non_streaming.html", "text/html"), + ("/streaming_upload.html", "text/html"), + ("/non_streaming_upload.html", "text/html"), ) _404_page = r""" diff --git a/python-api-examples/non_streaming_server.py b/python-api-examples/non_streaming_server.py index 389bfbf30..99899b798 100755 --- a/python-api-examples/non_streaming_server.py +++ b/python-api-examples/non_streaming_server.py @@ -537,9 +537,9 @@ async def process_request( if path == "/": path = "/index.html" if path == "/upload.html": - path = "/upload_non_streaming.html" + path = "/non_streaming_upload.html" if path == "/offline_record.html": - path = "/offline_record_non_streaming.html" + path = "/non_streaming_record.html" if path[-1] == "?": path = path[:-1] diff --git a/python-api-examples/streaming_server.py b/python-api-examples/streaming_server.py index e12c6eeca..ba17f200a 100755 --- a/python-api-examples/streaming_server.py +++ b/python-api-examples/streaming_server.py @@ -528,10 +528,26 @@ async def process_request( if path == "/": path = "/index.html" if path == "/upload.html": - path = "/upload_streaming.html" + path = "/streaming_upload.html" if path == "/offline_record.html": - path = "/offline_record_streaming.html" - found, response, mime_type = self.http_server.process_request(path) + response = r""" + +Speech recognition with next-gen Kaldi +

Only +/upload.html +and +/streaming_record.html +is available for the streaming server.

+
+
+Go back to /upload.html +or /streaming_record.html + +""" + found = True + mime_type = "text/html" + else: + found, response, mime_type = self.http_server.process_request(path) if isinstance(response, str): response = response.encode("utf-8") diff --git a/python-api-examples/web/js/offline_record_non_streaming.js b/python-api-examples/web/js/non_streaming_record.js similarity index 100% rename from python-api-examples/web/js/offline_record_non_streaming.js rename to python-api-examples/web/js/non_streaming_record.js diff --git a/python-api-examples/web/js/upload_non_streaming.js b/python-api-examples/web/js/non_streaming_upload.js similarity index 100% rename from python-api-examples/web/js/upload_non_streaming.js rename to python-api-examples/web/js/non_streaming_upload.js diff --git a/python-api-examples/web/js/offline_record_streaming.js b/python-api-examples/web/js/offline_record_streaming.js deleted file mode 100644 index b8f19f71b..000000000 --- a/python-api-examples/web/js/offline_record_streaming.js +++ /dev/null @@ -1,424 +0,0 @@ -// This file copies and modifies code -// from https://mdn.github.io/web-dictaphone/scripts/app.js -// and https://gist.github.com/meziantou/edb7217fddfbb70e899e - -var socket; -var recognition_text = []; - -function getDisplayResult() { - let i = 0; - let ans = ''; - for (let s in recognition_text) { - if (recognition_text[s] == '') continue; - - ans += '' + i + ': ' + recognition_text[s] + '\n'; - i += 1; - } - return ans; -} - -const serverIpInput = document.getElementById('server-ip'); -const serverPortInput = document.getElementById('server-port'); - -const connectBtn = document.getElementById('connect'); -const uploadBtn = document.getElementById('file'); - -function initWebSocket() { - let protocol = 'ws://'; - if (window.location.protocol == 'https:') { - protocol = 'wss://' - } - let server_ip = serverIpInput.value; - let server_port = serverPortInput.value; - console.log('protocol: ', protocol); - console.log('server_ip: ', server_ip); - console.log('server_port: ', server_port); - - let uri = protocol + server_ip + ':' + server_port; - console.log('uri', uri); - socket = new WebSocket(uri); - - // Connection opened - socket.addEventListener('open', function(event) { - console.log('connected'); - recordBtn.disabled = false; - connectBtn.disabled = true; - connectBtn.innerHTML = 'Connected!'; - }); - - // Connection closed - socket.addEventListener('close', function(event) { - console.log('disconnected'); - recordBtn.disabled = true; - stopBtn.disabled = true; - connectBtn.disabled = false; - connectBtn.innerHTML = 'Click me to connect!'; - }); - - // Listen for messages - socket.addEventListener('message', function(event) { - let message = JSON.parse(event.data); - if (message.segment in recognition_text) { - recognition_text[message.segment] = message.text; - } else { - recognition_text.push(message.text); - } - let text_area = document.getElementById('results'); - text_area.value = getDisplayResult(); - text_area.scrollTop = text_area.scrollHeight; // auto scroll - console.log('Received message: ', event.data); - - /* - document.getElementById('results').value = event.data; - socket.send('Done'); - console.log('Sent Done'); - socket.close(); - */ - }); -} - -const recordBtn = document.getElementById('offline_record'); -const stopBtn = document.getElementById('offline_stop'); -const clearBtn = document.getElementById('clear'); -const soundClips = document.getElementById('sound-clips'); -const canvas = document.getElementById('canvas'); -const mainSection = document.querySelector('.container'); - -recordBtn.disabled = true; -stopBtn.disabled = true; - -window.onload = (event) => { - console.log('page is fully loaded'); - console.log('protocol', window.location.protocol); - console.log('port', window.location.port); - if (window.location.protocol == 'https:') { - document.getElementById('ws-protocol').textContent = 'wss://'; - } - serverIpInput.value = window.location.hostname; - serverPortInput.value = window.location.port; -}; - -connectBtn.onclick = function() { - initWebSocket(); -}; - - -let audioCtx; -const canvasCtx = canvas.getContext('2d'); -let mediaStream; -let analyser; - -let expectedSampleRate = 16000; -let recordSampleRate; // the sampleRate of the microphone -let recorder = null; // the microphone -let leftchannel = []; // TODO: Use a single channel - -let recordingLength = 0; // number of samples so far - -clearBtn.onclick = function() { - document.getElementById('results').value = ''; - recognition_text = []; -}; - -function send_header(n) { - const header = new ArrayBuffer(8); - new DataView(header).setInt32(0, expectedSampleRate, true /* littleEndian */); - new DataView(header).setInt32(4, n, true /* littleEndian */); - socket.send(new Int32Array(header, 0, 2)); -} - -// copied/modified from https://mdn.github.io/web-dictaphone/ -// and -// https://gist.github.com/meziantou/edb7217fddfbb70e899e -if (navigator.mediaDevices.getUserMedia) { - console.log('getUserMedia supported.'); - - // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia - const constraints = {audio: true}; - - let onSuccess = function(stream) { - if (!audioCtx) { - audioCtx = new AudioContext(); - } - console.log(audioCtx); - recordSampleRate = audioCtx.sampleRate; - console.log('sample rate ' + recordSampleRate); - - // creates an audio node from the microphone incoming stream - mediaStream = audioCtx.createMediaStreamSource(stream); - console.log(mediaStream); - - // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor - // bufferSize: the onaudioprocess event is called when the buffer is full - var bufferSize = 2048; - var numberOfInputChannels = 2; - var numberOfOutputChannels = 2; - if (audioCtx.createScriptProcessor) { - recorder = audioCtx.createScriptProcessor( - bufferSize, numberOfInputChannels, numberOfOutputChannels); - } else { - recorder = audioCtx.createJavaScriptNode( - bufferSize, numberOfInputChannels, numberOfOutputChannels); - } - console.log(recorder); - - recorder.onaudioprocess = function(e) { - let samples = new Float32Array(e.inputBuffer.getChannelData(0)) - samples = downsampleBuffer(samples, expectedSampleRate); - let buf = new Int16Array(samples.length); - for (var i = 0; i < samples.length; ++i) { - let s = samples[i]; - if (s >= 1) - s = 1; - else if (s <= -1) - s = -1; - buf[i] = s * 32767; - } - leftchannel.push(buf); - recordingLength += bufferSize; - }; - - visualize(stream); - mediaStream.connect(analyser); - - recordBtn.onclick = function() { - mediaStream.connect(recorder); - mediaStream.connect(analyser); - recorder.connect(audioCtx.destination); - - console.log('recorder started'); - recordBtn.style.background = 'red'; - - stopBtn.disabled = false; - recordBtn.disabled = true; - }; - - stopBtn.onclick = function() { - console.log('recorder stopped'); - - // stopBtn recording - recorder.disconnect(audioCtx.destination); - mediaStream.disconnect(recorder); - mediaStream.disconnect(analyser); - - recordBtn.style.background = ''; - recordBtn.style.color = ''; - // mediaRecorder.requestData(); - - stopBtn.disabled = true; - recordBtn.disabled = false; - - const clipName = - prompt('Enter a name for your sound clip?', 'My unnamed clip'); - - const clipContainer = document.createElement('article'); - const clipLabel = document.createElement('p'); - const audio = document.createElement('audio'); - const deleteButton = document.createElement('button'); - clipContainer.classList.add('clip'); - audio.setAttribute('controls', ''); - deleteButton.textContent = 'Delete'; - deleteButton.className = 'delete'; - - if (clipName === null) { - clipLabel.textContent = 'My unnamed clip'; - } else { - clipLabel.textContent = clipName; - } - - clipContainer.appendChild(audio); - - clipContainer.appendChild(clipLabel); - clipContainer.appendChild(deleteButton); - soundClips.appendChild(clipContainer); - - audio.controls = true; - let samples = flatten(leftchannel); - let buf = new Float32Array(samples.length); - for (var i = 0; i < samples.length; ++i) { - let s = samples[i]; - buf[i] = s / 32767.0; - } - const blob = toWav(samples); - - leftchannel = []; - const audioURL = window.URL.createObjectURL(blob); - audio.src = audioURL; - console.log('recorder stopped'); - - deleteButton.onclick = function(e) { - let evtTgt = e.target; - evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode); - }; - - clipLabel.onclick = function() { - const existingName = clipLabel.textContent; - const newClipName = prompt('Enter a new name for your sound clip?'); - if (newClipName === null) { - clipLabel.textContent = existingName; - } else { - clipLabel.textContent = newClipName; - } - }; - - buf = buf.buffer - - let n = 1024 * 4; // send this number of bytes per request. - console.log('buf length, ' + buf.byteLength); - send_header(buf.byteLength); - - for (let start = 0; start < buf.byteLength; start += n) { - socket.send(buf.slice(start, start + n)); - } - socket.send('Done'); - console.log('Sent Done'); - }; - }; - - let onError = function(err) { - console.log('The following error occured: ' + err); - }; - - navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError); -} else { - console.log('getUserMedia not supported on your browser!'); - alert('getUserMedia not supported on your browser!'); -} - -function visualize(stream) { - if (!audioCtx) { - audioCtx = new AudioContext(); - } - - const source = audioCtx.createMediaStreamSource(stream); - - if (!analyser) { - analyser = audioCtx.createAnalyser(); - analyser.fftSize = 2048; - } - const bufferLength = analyser.frequencyBinCount; - const dataArray = new Uint8Array(bufferLength); - - // source.connect(analyser); - // analyser.connect(audioCtx.destination); - - draw() - - function draw() { - const WIDTH = canvas.width - const HEIGHT = canvas.height; - - requestAnimationFrame(draw); - - analyser.getByteTimeDomainData(dataArray); - - canvasCtx.fillStyle = 'rgb(200, 200, 200)'; - canvasCtx.fillRect(0, 0, WIDTH, HEIGHT); - - canvasCtx.lineWidth = 2; - canvasCtx.strokeStyle = 'rgb(0, 0, 0)'; - - canvasCtx.beginPath(); - - let sliceWidth = WIDTH * 1.0 / bufferLength; - let x = 0; - - for (let i = 0; i < bufferLength; i++) { - let v = dataArray[i] / 128.0; - let y = v * HEIGHT / 2; - - if (i === 0) { - canvasCtx.moveTo(x, y); - } else { - canvasCtx.lineTo(x, y); - } - - x += sliceWidth; - } - - canvasCtx.lineTo(canvas.width, canvas.height / 2); - canvasCtx.stroke(); - } -} - -window.onresize = function() { - canvas.width = mainSection.offsetWidth; -}; - -window.onresize(); - -// this function is copied/modified from -// https://gist.github.com/meziantou/edb7217fddfbb70e899e -function flatten(listOfSamples) { - let n = 0; - for (let i = 0; i < listOfSamples.length; ++i) { - n += listOfSamples[i].length; - } - let ans = new Int16Array(n); - - let offset = 0; - for (let i = 0; i < listOfSamples.length; ++i) { - ans.set(listOfSamples[i], offset); - offset += listOfSamples[i].length; - } - return ans; -} - -// this function is copied/modified from -// https://gist.github.com/meziantou/edb7217fddfbb70e899e -function toWav(samples) { - let buf = new ArrayBuffer(44 + samples.length * 2); - var view = new DataView(buf); - - // http://soundfile.sapp.org/doc/WaveFormat/ - // F F I R - view.setUint32(0, 0x46464952, true); // chunkID - view.setUint32(4, 36 + samples.length * 2, true); // chunkSize - // E V A W - view.setUint32(8, 0x45564157, true); // format - // - // t m f - view.setUint32(12, 0x20746d66, true); // subchunk1ID - view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM - view.setUint32(20, 1, true); // audioFormat, 1 for PCM - view.setUint16(22, 1, true); // numChannels: 1 channel - view.setUint32(24, expectedSampleRate, true); // sampleRate - view.setUint32(28, expectedSampleRate * 2, true); // byteRate - view.setUint16(32, 2, true); // blockAlign - view.setUint16(34, 16, true); // bitsPerSample - view.setUint32(36, 0x61746164, true); // Subchunk2ID - view.setUint32(40, samples.length * 2, true); // subchunk2Size - - let offset = 44; - for (let i = 0; i < samples.length; ++i) { - view.setInt16(offset, samples[i], true); - offset += 2; - } - - return new Blob([view], {type: 'audio/wav'}); -} - -// this function is copied from -// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46 -function downsampleBuffer(buffer, exportSampleRate) { - if (exportSampleRate === recordSampleRate) { - return buffer; - } - var sampleRateRatio = recordSampleRate / exportSampleRate; - var newLength = Math.round(buffer.length / sampleRateRatio); - var result = new Float32Array(newLength); - var offsetResult = 0; - var offsetBuffer = 0; - while (offsetResult < result.length) { - var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio); - var accum = 0, count = 0; - for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) { - accum += buffer[i]; - count++; - } - result[offsetResult] = accum / count; - offsetResult++; - offsetBuffer = nextOffsetBuffer; - } - return result; -}; diff --git a/python-api-examples/web/js/upload_streaming.js b/python-api-examples/web/js/streaming_upload.js similarity index 100% rename from python-api-examples/web/js/upload_streaming.js rename to python-api-examples/web/js/streaming_upload.js diff --git a/python-api-examples/web/offline_record_streaming.html b/python-api-examples/web/non_streaming_record.html similarity index 97% rename from python-api-examples/web/offline_record_streaming.html rename to python-api-examples/web/non_streaming_record.html index 592df07f4..a26c07a29 100644 --- a/python-api-examples/web/offline_record_streaming.html +++ b/python-api-examples/web/non_streaming_record.html @@ -76,6 +76,6 @@

Recognition from offline recordings

crossorigin="anonymous"> - + diff --git a/python-api-examples/web/upload_non_streaming.html b/python-api-examples/web/non_streaming_upload.html similarity index 97% rename from python-api-examples/web/upload_non_streaming.html rename to python-api-examples/web/non_streaming_upload.html index 5cf274a57..feb0c2944 100644 --- a/python-api-examples/web/upload_non_streaming.html +++ b/python-api-examples/web/non_streaming_upload.html @@ -63,6 +63,6 @@

Recognition from a selected file

crossorigin="anonymous"> - + diff --git a/python-api-examples/web/offline_record_non_streaming.html b/python-api-examples/web/offline_record_non_streaming.html deleted file mode 100644 index b633172ba..000000000 --- a/python-api-examples/web/offline_record_non_streaming.html +++ /dev/null @@ -1,81 +0,0 @@ - - - - - - - - - - - - - - Next-gen Kaldi demo (Upload file for recognition) - - - - - - - -

Recognition from offline recordings

-
-
-
- -
- ws:// - - : - -
- -
-
- -
-
-
-
- -
-
- -
-
-
- -
- - -
- - - -
-
- - - - - - - - - - - diff --git a/python-api-examples/web/upload_streaming.html b/python-api-examples/web/streaming_upload.html similarity index 97% rename from python-api-examples/web/upload_streaming.html rename to python-api-examples/web/streaming_upload.html index 1d1226a31..0321b6090 100644 --- a/python-api-examples/web/upload_streaming.html +++ b/python-api-examples/web/streaming_upload.html @@ -63,6 +63,6 @@

Recognition from a selected file

crossorigin="anonymous"> - +