You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I am currently working on integrating Deepgram’s live audio transcription capabilities into my web application. My goal is to display interim (partial) transcription results in real-time as users speak into the microphone, providing immediate feedback before the final transcription is delivered.
Current Implementation
Below is my current client.js setup, which handles microphone access and WebSocket communication with the Deepgram server. While this implementation successfully captures and sends audio data to the server and displays the final transcription, it does not support displaying interim results in real-time.
`
const captions = window.document.getElementById("captions");
async function getMicrophone() {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
return new MediaRecorder(stream, { mimeType: "audio/webm" });
} catch (error) {
console.error("error accessing microphone:", error);
throw error;
}
}
async function openMicrophone(microphone, socket) {
return new Promise((resolve) => {
microphone.onstart = () => {
console.log("client: microphone opened");
document.body.classList.add("recording");
resolve();
};
microphone.onstop = () => {
console.log("client: microphone closed");
document.body.classList.remove("recording");
};
microphone.ondataavailable = (event) => {
console.log("client: microphone data received");
if (event.data.size > 0 && socket.readyState === WebSocket.OPEN) {
socket.send(event.data);
}
};
microphone.start(500);
});
}
async function closeMicrophone(microphone) {
microphone.stop();
}
async function start(socket) {
const listenButton = document.querySelector("#record");
let microphone;
console.log("client: waiting to open microphone");
listenButton.addEventListener("click", async () => {
if (!microphone) {
try {
microphone = await getMicrophone();
await openMicrophone(microphone, socket);
} catch (error) {
console.error("error opening microphone:", error);
}
} else {
await closeMicrophone(microphone);
microphone = undefined;
}
});
}
window.addEventListener("load", () => {
const socket = new WebSocket("ws://localhost:3000");
socket.addEventListener("open", async () => {
console.log("client: connected to server");
await start(socket);
});
socket.addEventListener("message", (event) => {
if (event.data === "") {
return;
}
let data;
try {
data = JSON.parse(event.data);
} catch (e) {
console.error("Failed to parse JSON:", e);
return;
}
if (data && data.channel && data.channel.alternatives[0].transcript !== "") {
// Append new transcript to the existing content
captions.innerHTML += data.channel.alternatives[0].transcript + " ";
captions.scrollTop = captions.scrollHeight; // Keep scrolling to the bottom
}
});
socket.addEventListener("close", () => {
console.log("client: disconnected from server");
});
});
`
I would like to modify this implementation to display interim transcription results in real-time as the user speaks. Specifically, I aim to:
1Receive partial transcripts from Deepgram while the audio is still being processed.
2Update the captions element with these interim results, providing immediate feedback to the user.
3Ensure that the final transcription replaces or completes the interim results once processing is complete.
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
Hello Deepgram Community,
I am currently working on integrating Deepgram’s live audio transcription capabilities into my web application. My goal is to display interim (partial) transcription results in real-time as users speak into the microphone, providing immediate feedback before the final transcription is delivered.
Current Implementation
Below is my current client.js setup, which handles microphone access and WebSocket communication with the Deepgram server. While this implementation successfully captures and sends audio data to the server and displays the final transcription, it does not support displaying interim results in real-time.
I would like to modify this implementation to display interim transcription results in real-time as the user speaks. Specifically, I aim to:
1Receive partial transcripts from Deepgram while the audio is still being processed.
2Update the captions element with these interim results, providing immediate feedback to the user.
3Ensure that the final transcription replaces or completes the interim results once processing is complete.
Beta Was this translation helpful? Give feedback.
All reactions