// ==UserScript==
// @name chatGPT speechToText
// @namespace https://github.com/LawOff/chatGPT-Snippets
// @version 0.1
// @description Convert spoken words into written text with a single click.
// @author LawOff
// @match https://chat.openai.com/*
// @icon https://www.google.com/s2/favicons?sz=64&domain=openai.com
// ==/UserScript==
class SpeechToText {
constructor() {
this.autoSend = true;
this.recognition = new (window.webkitSpeechRecognition ||
window.SpeechRecognition)();
this.textarea = document.evaluate(
'//*[@id="__next"]/div/div[1]/main/div[2]/form/div/div[2]/textarea',
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
this.textarea.classList.add("pr-10");
this.textarea.style.paddingRight = "50px";
this.recognition.lang = "en-US";
this.recognition.continuous = true;
this.recognition.interimResults = true;
this.recognition.maxAlternatives = 1;
this.recognition.addEventListener("result", this.handleResults.bind(this));
this.addButton();
}
handleResults(event) {
const transcript = Array.from(event.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join("");
console.log(transcript);
this.textarea.focus();
this.textarea.value = transcript;
this.textarea.dispatchEvent(new Event("input", { bubbles: true }));
if (event.results[0].isFinal) {
this.recognition.stop();
this.enableButton();
}
}
enableButton() {
// envoie auto:
var sendButton = document.evaluate(
'//*[@id="__next"]/div/div[1]/main/div[2]/form/div/div[2]/button[1]',
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
if (this.autoSend) {
sendButton.click();
}
var loadButton = document.evaluate(
'//*[@id="__next"]/div/div[1]/main/div[2]/form/div/div[2]/button[2]',
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
loadButton.innerHTML =
'';
loadButton.disabled = false;
loadButton.classList.add(
"hover:bg-gray-100",
"dark:hover:text-gray-400",
"dark:hover:bg-gray-900",
"disabled:hover:bg-transparent",
"dark:disabled:hover:bg-transparent"
);
}
disableButton(button) {
console.log(button);
button.innerHTML =
'';
button.disabled = true;
button.classList.remove(
"hover:bg-gray-100",
"dark:hover:text-gray-400",
"dark:hover:bg-gray-900",
"disabled:hover:bg-transparent",
"dark:disabled:hover:bg-transparent"
);
}
addButton() {
var div = document.evaluate(
'//*[@id="__next"]/div/div[1]/main/div[2]/form/div/div[2]',
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue;
var button = document.createElement("button");
button.classList.add(
"absolute",
"p-1",
"rounded-md",
"text-gray-500",
"bottom-1.5",
"right-1",
"md:bottom-2.5",
"md:right-2",
"hover:bg-gray-100",
"dark:hover:text-gray-400",
"dark:hover:bg-gray-900",
"disabled:hover:bg-transparent",
"dark:disabled:hover:bg-transparent"
);
button.innerHTML =
'';
button.style.right = "35px";
button.addEventListener("click", (e) => {
this.disableButton(button);
e.preventDefault();
console.log("STT start");
this.recognition.start();
return false;
});
div.appendChild(button);
}
}
window.addEventListener("load", () => {
const speechToText = new SpeechToText();
});