/*! p5.speech.js v0.0.1 2015-06-12 */ /* updated v0.0.2 2017-10-17 */ /* updated v0.0.3 2022.1.7 */ /** * @module p5.speech * @submodule p5.speech * @for p5.speech * @main */ /** * p5.speech * R. Luke DuBois (dubois@nyu.edu) * ABILITY Lab / Integrated Design & Media * New York University * The MIT License (MIT). * * https://github.com/IDMNYU/p5.js-speech * * Web Speech API: https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html * Web Speech Recognition API: https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html */ (function (root, factory) { if (typeof define === 'function' && define.amd) define('p5.speech', ['p5'], function (p5) { (factory(p5));}); else if (typeof exports === 'object') factory(require('../p5')); else factory(root['p5']); }(this, function (p5) { // ============================================================================= // p5.Speech // ============================================================================= /** * Base class for a Speech Synthesizer * * @class p5.Speech * @constructor */ p5.Speech = function(_dv, _callback) { // // speech synthesizers consist of a single synthesis engine // per window instance, and a variable number of 'utterance' // objects, which can be cached and re-used for, e.g. // auditory UI. // // this implementation assumes a monolithic (one synth, // one phrase at a time) system. // // make a speech synthizer (this will load voices): this.synth = window.speechSynthesis; // make an utterance to use with this synthesizer: this.utterance = new SpeechSynthesisUtterance(); this.isLoaded = 0; // do we have voices yet? // do we queue new utterances upon firing speak() // or interrupt what's speaking: this.interrupt = false; // callback properties to be filled in within the p5 sketch // if the author needs custom callbacks: this.onLoad; // fires when voices are loaded and synth is ready this.onStart; // fires when an utterance begins... this.onPause; // ...is paused... this.onResume; // ...resumes... this.onEnd; // ...and ends. this.voices = []; // array of available voices (dependent on browser/OS) // first parameter of constructor is an initial voice selector this.initvoice; if(_dv !== undefined) this.initvoice=_dv; if(_callback !== undefined) this.onLoad =_callback; var that = this; // aliasing 'this' into a fixed variable // onvoiceschanged() fires automatically when the synthesizer // is configured and has its voices loaded. you don't need // to wait for this if you're okay with the default voice. // // we use this function to load the voice array and bind our // custom callback functions. window.speechSynthesis.onvoiceschanged = function() { if(that.isLoaded==0) { // run only once that.voices = window.speechSynthesis.getVoices(); that.isLoaded = 1; // we're ready console.log("p5.Speech: voices loaded!"); if(that.initvoice!=undefined) { that.setVoice(that.initvoice); // set a custom initial voice console.log("p5.Speech: initial voice: " + that.initvoice); } // fire custom onLoad() callback, if it exists: if(that.onLoad!=undefined) that.onLoad(); // // bind other custom callbacks: // that.utterance.onstart = function(e) { //console.log("STARTED"); if(that.onStart!=undefined) that.onStart(e); }; that.utterance.onpause = function(e) { //console.log("PAUSED"); if(that.onPause!=undefined) that.onPause(e); }; that.utterance.onresume = function(e) { //console.log("RESUMED"); if(that.onResume!=undefined) that.onResume(e); }; that.utterance.onend = function(e) { //console.log("ENDED"); if(that.onEnd!=undefined) that.onEnd(e); }; } }; }; // end p5.Speech constructor // listVoices() - dump voice names to javascript console: p5.Speech.prototype.listVoices = function() { if(this.isLoaded) { for(var i = 0;i0.0-2.0. voice will crash out of bounds. p5.Speech.prototype.setPitch = function(_v) { this.utterance.pitch = Math.min(Math.max(_v, 0.01), 2.0); }; // sets the language of the voice. p5.Speech.prototype.setLang = function(_lang) { this.utterance.lang = _lang; } // speak a phrase through the current synthesizer: p5.Speech.prototype.speak = function(_phrase) { if(this.interrupt) this.synth.cancel(); this.utterance.text = _phrase; this.synth.speak(this.utterance); }; // not working... p5.Speech.prototype.pause = function() { this.synth.pause(); }; // not working... p5.Speech.prototype.resume = function() { this.synth.resume(); }; // stop current utterance: p5.Speech.prototype.stop = function() { // not working... //this.synth.stop(); this.synth.cancel(); }; // kill synthesizer completely, clearing any queued utterances: p5.Speech.prototype.cancel = function() { this.synth.cancel(); // KILL SYNTH }; // Setting callbacks with functions instead p5.Speech.prototype.started = function(_cb) { this.onStart = _cb; } p5.Speech.prototype.ended = function(_cb) { this.onEnd = _cb; } p5.Speech.prototype.paused = function(_cb) { this.onPause = _cb; } p5.Speech.prototype.resumed = function(_cb) { this.onResume = _cb; } // ============================================================================= // p5.SpeechRec // ============================================================================= /** * Base class for a Speech Recognizer * * @class p5.SpeechRec * @constructor */ p5.SpeechRec = function(_lang, _callback) { // // speech recognition consists of a recognizer object per // window instance that returns a JSON object containing // recognition. this JSON object grows when the synthesizer // is in 'continuous' mode, with new recognized phrases // appended into an internal array. // // this implementation returns the full JSON, but also a set // of simple, query-ready properties containing the most // recently recognized speech. // // make a recognizer object. if('webkitSpeechRecognition' in window) { this.rec = new (window.SpeechRecognition || window.webkitSpeechRecognition || window.mozSpeechRecognition || window.msSpeechRecognition)(); } else { this.rec = new Object(); console.log("p5.SpeechRec: Speech Recognition not supported in this browser."); } // first parameter is language model (defaults to empty=U.S. English) // no list of valid models in API, but it must use BCP-47. // here's some hints: // http://stackoverflow.com/questions/14257598/what-are-language-codes-for-voice-recognition-languages-in-chromes-implementati if(_lang !== undefined) this.rec.lang=_lang; // callback properties to be filled in within the p5 sketch // if the author needs custom callbacks: this.onResult; // fires when something has been recognized this.onStart; // fires when the recognition system is started... this.onError; // ...has a problem (e.g. the mic is shut off)... this.onEnd; // ...and ends (in non-continuous mode). if(_callback !== undefined) this.onResult=_callback; // recognizer properties: // continous mode means the object keeps recognizing speech, // appending new tokens to the internal JSON. this.continuous = false; // interimResults means the object will report (i.e. fire its // onresult() callback) more frequently, rather than at pauses // in microphone input. this gets you quicker, but less accurate, // results. this.interimResults = false; // result data: // resultJSON: // this is a full JSON returned by onresult(). it consists of a // SpeechRecognitionEvent object, which contains a (wait for it) // SpeechRecognitionResultList. this is an array. in continuous // mode, it will be appended to, not cleared. each element is a // SpeechRecognition result, which contains a (groan) // SpeechRecognitionAlternative, containing a 'transcript' property. // the 'transcript' is the recognized phrase. have fun. this.resultJSON; // resultValue: // validation flag which indicates whether the recognizer succeeded. // this is *not* a metric of speech clarity, but rather whether the // speech recognition system successfully connected to and received // a response from the server. you can construct an if() around this // if you're feeling worried. this.resultValue; // resultString: // the 'transcript' of the most recently recognized speech as a simple // string. this will be blown out and replaced at every firing of the // onresult() callback. this.resultString; // resultConfidence: // the 'confidence' (0-1) of the most recently recognized speech, e.g. // that it reflects what was actually spoken. you can use this to filter // out potentially bogus recognition tokens. this.resultConfidence; var that = this; // aliasing 'this' into a fixed variable // onresult() fires automatically when the recognition engine // detects speech, or times out trying. // // it fills up a JSON array internal to the webkitSpeechRecognition // object. we reference it over in our struct here, and also copy // out the most recently detected phrase and confidence value. this.rec.onresult = function(e) { that.resultJSON = e; // full JSON of callback event that.resultValue = e.returnValue; // was successful? // store latest result in top-level object struct that.resultString = e.results[e.results.length-1][0].transcript.trim(); that.resultConfidence = e.results[e.results.length-1][0].confidence; if(that.onResult!=undefined) that.onResult(); }; // fires when the recognition system starts (i.e. when you 'allow' // the mic to be used in the browser). this.rec.onstart = function(e) { if(that.onStart!=undefined) that.onStart(e); }; // fires on a client-side error (server-side errors are expressed // by the resultValue in the JSON coming back as 'false'). this.rec.onerror = function(e) { if(that.onError!=undefined) that.onError(e); }; // fires when the recognition finishes, in non-continuous mode. this.rec.onend = function() { if(that.onEnd!=undefined) that.onEnd(); }; }; // end p5.SpeechRec constructor // start the speech recognition engine. this will prompt a // security dialog in the browser asking for permission to // use the microphone. this permission will persist throughout // this one 'start' cycle. if you need to recognize speech more // than once, use continuous mode rather than firing start() // multiple times in a single script. p5.SpeechRec.prototype.start = function(_continuous, _interim) { if('webkitSpeechRecognition' in window) { if(_continuous !== undefined) this.continuous = _continuous; if(_interim !== undefined) this.interimResults = _interim; this.rec.continuous = this.continuous; this.rec.interimResults = this.interimResults; this.rec.start(); } }; // Add function to stop the speech recognition from continued listening p5.SpeechRec.prototype.stop = function() { if('webkitSpeechRecognition' in window) { this.rec.stop(); } }; })); /* todo: * fix callbacks (pause, resume) in synthesizer. * support speech grammar models for scoped auditory UI. * support markdown, boundaries, etc for better synthesis tracking. * support utterance parser for long phrases. */ // EOF