import 'dotenv/config' import { Readable } from 'node:stream' import microphone from 'mic' import { RealtimeClient } from 'openai-realtime-api' import Speaker from 'speaker' /** * Simple Node.js demo using the `RealtimeClient` with a microphone and speaker * to simulate a full, back & forth conversation from the terminal. */ async function main() { const client = new RealtimeClient({ debug: false, sessionConfig: { instructions: 'Please follow the instructions of any query you receive.\n' + 'Be concise in your responses. Speak quickly and answer shortly.', turn_detection: null } }) await client.connect() await client.waitForSessionCreated() let mic: microphone.Mic | undefined let speaker: Speaker | undefined startAudioStream() client.on('conversation.item.completed', ({ item }) => { const { formatted: _, ...rest } = item console.log('Conversation item completed:', rest) if ( item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio ) { console.log(`Playing audio response... "${item.formatted.transcript}"`) playAudio(item.formatted.audio) } }) function startAudioStream() { try { mic = microphone({ rate: '24000', channels: '1', debug: false, exitOnSilence: 6, fileType: 'raw', encoding: 'signed-integer' }) const micInputStream = mic!.getAudioStream() micInputStream.on('error', (error: any) => { console.error('Microphone error:', error) }) mic!.start() console.log('Microphone started streaming.') let audioBuffer = Buffer.alloc(0) const chunkSize = 4800 // 0.2 seconds of audio at 24kHz micInputStream.on('data', (data: Buffer) => { audioBuffer = Buffer.concat([audioBuffer, data]) while (audioBuffer.length >= chunkSize) { const chunk = audioBuffer.subarray(0, chunkSize) audioBuffer = audioBuffer.subarray(chunkSize) const int16Array = new Int16Array( chunk.buffer, chunk.byteOffset, chunk.length / 2 ) try { client.appendInputAudio(int16Array) } catch (err) { console.error('Error sending audio data:', err) } } }) micInputStream.on('silence', () => { console.log('Silence detected, creating response...') try { client.createResponse() } catch (err) { console.error('Error creating response:', err) } }) } catch (err) { console.error('Error starting audio stream:', err) } } function playAudio(audioData: Int16Array) { try { if (!speaker) { speaker = new Speaker({ channels: 1, bitDepth: 16, sampleRate: client.conversation.frequency }) } const origSpeaker = speaker const buffer = Buffer.from(audioData.buffer) const readableStream = new Readable({ read() { if (speaker !== origSpeaker) return this.push(buffer) this.push(null) } }) // Pipe the audio stream to the speaker readableStream.pipe(speaker) console.log( 'Audio sent to speaker for playback. Buffer length:', buffer.length ) speaker.on('close', () => { speaker = undefined }) } catch (err) { console.error('Error playing audio:', err) } } } await main()