// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. namespace Azure.AI.VoiceLive.Samples; using System.Net.WebSockets; /// /// Voice assistant that uses MCP (Model Context Protocol) servers to give the model /// access to external tools and data sources via the VoiceLive MCP API. /// Requires API version 2026-01-01-preview. /// public class MCPVoiceAssistant : IDisposable { private readonly VoiceLiveClient _client; private readonly string _model; private readonly string _voice; private readonly string _instructions; private readonly ILogger _logger; private readonly ILoggerFactory _loggerFactory; private VoiceLiveSession? _session; private AudioProcessor? _audioProcessor; private bool _disposed; public MCPVoiceAssistant( VoiceLiveClient client, string model, string voice, string instructions, ILoggerFactory loggerFactory) { _client = client ?? throw new ArgumentNullException(nameof(client)); _model = model ?? throw new ArgumentNullException(nameof(model)); _voice = voice ?? throw new ArgumentNullException(nameof(voice)); _instructions = instructions ?? throw new ArgumentNullException(nameof(instructions)); _loggerFactory = loggerFactory ?? throw new ArgumentNullException(nameof(loggerFactory)); _logger = loggerFactory.CreateLogger(); } public async Task StartAsync(CancellationToken cancellationToken = default) { try { _logger.LogInformation("Connecting to VoiceLive with MCP tools, model: {Model}", _model); _session = await _client.StartSessionAsync(_model, cancellationToken).ConfigureAwait(false); _audioProcessor = new AudioProcessor(_session, _loggerFactory.CreateLogger()); await SetupSessionAsync(cancellationToken).ConfigureAwait(false); await _audioProcessor.StartPlaybackAsync().ConfigureAwait(false); await _audioProcessor.StartCaptureAsync().ConfigureAwait(false); Console.WriteLine(); Console.WriteLine("=" + new string('=', 59)); Console.WriteLine("MCP VOICE ASSISTANT READY"); Console.WriteLine("MCP tools: deepwiki, azure_doc"); Console.WriteLine("Start speaking to begin conversation"); Console.WriteLine("Press Ctrl+C to exit"); Console.WriteLine("=" + new string('=', 59)); Console.WriteLine(); await ProcessEventsAsync(cancellationToken).ConfigureAwait(false); } catch (OperationCanceledException) { _logger.LogInformation("Shutting down..."); } catch (Exception ex) { _logger.LogError(ex, "Connection error"); throw; } finally { if (_audioProcessor != null) await _audioProcessor.CleanupAsync().ConfigureAwait(false); } } private async Task SetupSessionAsync(CancellationToken cancellationToken) { var sessionOptions = new VoiceLiveSessionOptions { Model = _model, Instructions = _instructions, Voice = new AzureStandardVoice(_voice), InputAudioFormat = InputAudioFormat.Pcm16, OutputAudioFormat = OutputAudioFormat.Pcm16, InputAudioEchoCancellation = new AudioEchoCancellation(), InputAudioNoiseReduction = new AudioNoiseReduction(AudioNoiseReductionType.NearField), TurnDetection = new ServerVadTurnDetection { Threshold = 0.5f, PrefixPadding = TimeSpan.FromMilliseconds(300), SilenceDuration = TimeSpan.FromMilliseconds(500) }, // Whisper1 is the most broadly available transcription model across resources. InputAudioTranscription = new AudioInputTranscriptionOptions(AudioInputTranscriptionOptionsModel.Whisper1) }; sessionOptions.Modalities.Clear(); sessionOptions.Modalities.Add(InteractionModality.Text); sessionOptions.Modalities.Add(InteractionModality.Audio); // Register MCP servers — both set to never require approval since // MCPApprovalResponseRequestItem is not available for sending approvals from client code. sessionOptions.Tools.Add(new VoiceLiveMcpServerDefinition("deepwiki", "https://mcp.deepwiki.com/mcp") { RequireApproval = MCPApprovalType.Never, AllowedTools = { "read_wiki_structure", "ask_question" } }); sessionOptions.Tools.Add(new VoiceLiveMcpServerDefinition("azure_doc", "https://learn.microsoft.com/api/mcp") { RequireApproval = MCPApprovalType.Never }); try { await _session!.ConfigureSessionAsync(sessionOptions, cancellationToken).ConfigureAwait(false); _logger.LogInformation("Session configured with {ToolCount} MCP server(s)", sessionOptions.Tools.Count); } catch (WebSocketException ex) { _logger.LogError(ex, "MCP session configuration failed. The endpoint/model may not support MCP configuration or the requested options."); throw new InvalidOperationException( "MCP session setup was rejected by the service. Verify this resource supports VoiceLive MCP with API version 2026-01-01-preview, and that the configured model/transcription options are available.", ex); } } private async Task ProcessEventsAsync(CancellationToken cancellationToken) { try { await foreach (SessionUpdate update in _session!.GetUpdatesAsync(cancellationToken).ConfigureAwait(false)) { await HandleUpdateAsync(update, cancellationToken).ConfigureAwait(false); } } catch (OperationCanceledException) { _logger.LogInformation("Event processing cancelled"); } } private async Task HandleUpdateAsync(SessionUpdate update, CancellationToken cancellationToken) { _logger.LogDebug("Received event: {EventType}", update.GetType().Name); switch (update) { case SessionUpdateSessionCreated sessionCreated: _logger.LogInformation("Session ready: {SessionId}", sessionCreated.Session?.Id); break; case SessionUpdateSessionUpdated: _logger.LogInformation("Session updated"); break; // MCP tool discovery events case SessionUpdateMcpListToolsInProgress inProgress: _logger.LogInformation("Discovering MCP tools (item: {ItemId})", inProgress.ItemId); Console.WriteLine("[MCP] Discovering tools from MCP server..."); break; case SessionUpdateMcpListToolsCompleted completed: _logger.LogInformation("MCP tools loaded (item: {ItemId})", completed.ItemId); Console.WriteLine("[MCP] MCP tools ready"); break; case SessionUpdateMcpListToolsFailed failed: _logger.LogWarning("Failed to load MCP tools (item: {ItemId})", failed.ItemId); Console.WriteLine("[MCP] Failed to load MCP tools"); break; // MCP tool call events case SessionUpdateResponseMcpCallInProgress callInProgress: _logger.LogInformation("MCP call in progress (item: {ItemId})", callInProgress.ItemId); Console.WriteLine("[MCP] Calling MCP tool..."); break; case SessionUpdateResponseMcpCallCompleted callCompleted: _logger.LogInformation("MCP call completed (item: {ItemId})", callCompleted.ItemId); Console.WriteLine("[MCP] Tool call completed"); break; case SessionUpdateResponseMcpCallFailed callFailed: _logger.LogWarning("MCP call failed (item: {ItemId})", callFailed.ItemId); Console.WriteLine("[MCP] Tool call failed"); break; case SessionUpdateConversationItemInputAudioTranscriptionCompleted transcription: Console.WriteLine($"[User]: {transcription.Transcript}"); _logger.LogInformation("User: {Transcript}", transcription.Transcript); break; case SessionUpdateInputAudioBufferSpeechStarted: Console.WriteLine("Listening..."); if (_audioProcessor != null) await _audioProcessor.StopPlaybackAsync().ConfigureAwait(false); try { await _session!.CancelResponseAsync(cancellationToken).ConfigureAwait(false); } catch (Exception ex) { _logger.LogDebug(ex, "No response to cancel"); } break; case SessionUpdateInputAudioBufferSpeechStopped: Console.WriteLine("Processing..."); if (_audioProcessor != null) await _audioProcessor.StartPlaybackAsync().ConfigureAwait(false); break; case SessionUpdateResponseCreated: _logger.LogInformation("Response started"); break; case SessionUpdateResponseAudioTranscriptDone transcriptDone: if (!string.IsNullOrEmpty(transcriptDone.Transcript)) { Console.WriteLine($"[Assistant]: {transcriptDone.Transcript}"); _logger.LogInformation("Assistant: {Transcript}", transcriptDone.Transcript); } break; case SessionUpdateResponseAudioDelta audioDelta: if (audioDelta.Delta != null && _audioProcessor != null) await _audioProcessor.QueueAudioAsync(audioDelta.Delta.ToArray()).ConfigureAwait(false); break; case SessionUpdateResponseAudioDone: _logger.LogInformation("Assistant finished speaking"); Console.WriteLine("Ready..."); break; case SessionUpdateResponseDone: _logger.LogInformation("Response complete"); break; case SessionUpdateError errorEvent: _logger.LogError("Error: {ErrorMessage}", errorEvent.Error?.Message); Console.WriteLine($"Error: {errorEvent.Error?.Message}"); break; } } public void Dispose() { if (_disposed) return; _audioProcessor?.Dispose(); _session?.Dispose(); _disposed = true; } }