# ============================================================================= # Auto-generated from literate documentation # Do not edit directly - modify the source markdown instead # ============================================================================= # --- substitutions --- substitutions: device_name: "${timer_area}-voice-assistant" friendly_name: "${timer_area} Voice Assistant" device_description: "Waveshare ESP32-S3-Audio Board" # REQUIRED: Set this to match your HA area timer_area: "kitchen" # Voice assistant phase IDs voice_assist_idle_phase_id: "1" voice_assist_listening_phase_id: "2" voice_assist_thinking_phase_id: "3" voice_assist_replying_phase_id: "4" voice_assist_not_ready_phase_id: "10" voice_assist_error_phase_id: "11" voice_assist_muted_phase_id: "12" voice_assist_timer_finished_phase_id: "20" voice_assist_ota_phase_id: "30" # Audio configuration i2s_mclk: GPIO12 i2s_bclk: GPIO13 i2s_lrclk: GPIO14 i2c_scl: GPIO10 i2c_sda: GPIO11 amp_ctrl: "8" # TCA9555 pin # LED ring led_num: "7" # Display configuration display_width: "240" display_height: "280" image_height: "240" display_clk_pin: GPIO4 display_mosi_pin: GPIO9 display_cs_pin: GPIO3 display_dc_pin: GPIO7 display_backlight_pin: GPIO5 # Generic voice assistant images loading_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/loading_320_240.png idle_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/idle_320_240.png listening_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/listening_320_240.png thinking_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/thinking_320_240.png replying_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/replying_320_240.png error_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/error_320_240.png timer_finished_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/timer_finished_320_240.png # Background colors loading_illustration_background_color: "000000" idle_illustration_background_color: "000000" listening_illustration_background_color: "FFFFFF" thinking_illustration_background_color: "FFFFFF" replying_illustration_background_color: "FFFFFF" error_illustration_background_color: "000000" timer_finished_illustration_background_color: "FFFFFF" # Font configuration font_glyphsets: "GF_Latin_Core" font_family: Figtree # --- esphome --- esphome: name: ${device_name} friendly_name: ${friendly_name} comment: ${device_description} min_version: 2025.5.0 on_boot: - priority: 375 then: - script.execute: control_leds - script.execute: draw_display - delay: 30s - if: condition: lambda: return id(init_in_progress); then: - lambda: id(init_in_progress) = false; - script.execute: control_leds - script.execute: draw_display # --- esp32 --- esp32: board: esp32-s3-devkitc-1 cpu_frequency: 240MHz variant: esp32s3 flash_size: 16MB framework: type: esp-idf version: recommended sdkconfig_options: CONFIG_ESP32S3_DATA_CACHE_64KB: "y" CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y" CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB: "y" CONFIG_SPIRAM_RODATA: "y" CONFIG_SPIRAM_FETCH_INSTRUCTIONS: "y" psram: mode: octal speed: 80MHz external_components: - source: type: git url: "https://github.com/sw3Dan/waveshare-s2-audio_esphome_voice" ref: main components: [es8311] refresh: 0s # --- api --- api: id: api_id on_client_connected: - script.execute: control_leds - script.execute: draw_display on_client_disconnected: - script.execute: control_leds - script.execute: draw_display services: - service: timer_finished then: - logger.log: "Timer finished! Playing alarm..." - switch.turn_on: timer_ringing - service: timer_started variables: duration: int then: - logger.log: format: "Timer started with duration: %d seconds" args: ["duration"] - script.execute: control_leds - script.execute: draw_display - service: timer_cancelled then: - logger.log: "Timer cancelled" - switch.turn_off: timer_ringing - script.execute: control_leds - script.execute: draw_display - service: stop_alarm then: - switch.turn_off: timer_ringing # --- wifi --- wifi: id: wifi_id ssid: !secret wifi_ssid password: !secret wifi_password on_connect: - script.execute: control_leds - script.execute: draw_display on_disconnect: - script.execute: control_leds - script.execute: draw_display logger: level: INFO logs: light: WARN sensor: WARN component: ERROR ota: - platform: esphome id: ota_esphome on_begin: - lambda: |- id(voice_assistant_phase) = ${voice_assist_ota_phase_id}; id(ota_progress) = 0; - display.page.show: ota_page - component.update: lcd_display on_progress: - lambda: id(ota_progress) = (int)x; - component.update: lcd_display on_end: - lambda: id(ota_progress) = 100; on_error: - lambda: id(ota_progress) = -1; - display.page.show: error_page - delay: 5s - script.execute: draw_display # --- interval --- interval: - interval: 30s then: - lambda: |- if (id(voice_assistant_phase) == ${voice_assist_idle_phase_id} || id(voice_assistant_phase) == ${voice_assist_muted_phase_id}) { std::string state = id(timer_state).state; if (state == "active" || state == "paused") { ESP_LOGD("timer_sync", "Timer active but display idle - redrawing"); id(control_leds).execute(); id(draw_display).execute(); } } - interval: 1s then: - lambda: |- std::string state = id(timer_state).state; float remaining = id(timer_remaining).state; if ((state == "active" || state == "paused") && !std::isnan(remaining) && remaining <= 60) { id(draw_display).execute(); } # --- i2c --- i2c: - id: internal_i2c sda: ${i2c_sda} scl: ${i2c_scl} scan: true frequency: 100kHz tca9555: id: ioexp i2c_id: internal_i2c address: 0x20 # --- spi --- spi: - id: display_spi clk_pin: ${display_clk_pin} mosi_pin: ${display_mosi_pin} # --- audio --- i2s_audio: - id: i2s_audio_bus i2s_lrclk_pin: ${i2s_lrclk} i2s_bclk_pin: ${i2s_bclk} i2s_mclk_pin: ${i2s_mclk} audio_adc: - platform: es7210 id: adc_mic i2c_id: internal_i2c bits_per_sample: 16bit audio_dac: - platform: es8311 id: es8311_dac i2c_id: internal_i2c bits_per_sample: 16bit microphone: - platform: i2s_audio id: i2s_mics i2s_din_pin: GPIO15 adc_type: external pdm: false bits_per_sample: 16bit speaker: - platform: i2s_audio id: i2s_audio_speaker i2s_dout_pin: GPIO16 dac_type: external bits_per_sample: 16bit channel: left audio_dac: es8311_dac media_player: - platform: speaker id: external_media_player name: None volume_min: 0.4 volume_max: 0.85 announcement_pipeline: speaker: i2s_audio_speaker format: FLAC sample_rate: 48000 num_channels: 1 files: - id: timer_finished_sound file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac - id: wake_word_triggered_sound_file file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/wake_word_triggered.flac on_announcement: - script.execute: control_leds on_idle: - delay: 100ms - if: condition: and: - not: voice_assistant.is_running - switch.is_off: timer_ringing then: - script.execute: start_wake_word - script.execute: set_idle_or_mute_phase - script.execute: control_leds - script.execute: draw_display # --- wake_word --- micro_wake_word: id: mww models: - model: okay_nabu id: okay_nabu vad: model: github://esphome/micro-wake-word-models/models/v2/vad.json on_wake_word_detected: - voice_assistant.start: wake_word: !lambda return wake_word; voice_assistant: id: va microphone: i2s_mics media_player: external_media_player micro_wake_word: mww noise_suppression_level: 2 auto_gain: 31dBFS volume_multiplier: 2.0 on_listening: - lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id}; - script.execute: control_leds - script.execute: draw_display on_stt_vad_end: - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id}; - script.execute: control_leds - script.execute: draw_display on_tts_start: - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; - script.execute: control_leds - script.execute: draw_display on_end: - wait_until: condition: and: - not: media_player.is_announcing - not: speaker.is_playing timeout: 5s - lambda: id(va).set_use_wake_word(false); - micro_wake_word.start: - script.execute: set_idle_or_mute_phase - script.execute: control_leds - script.execute: draw_display on_error: - if: condition: lambda: return !id(init_in_progress); then: - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id}; - script.execute: control_leds - script.execute: draw_display - delay: 1s - script.execute: set_idle_or_mute_phase - script.execute: control_leds - script.execute: draw_display on_client_connected: - lambda: id(init_in_progress) = false; - script.execute: start_wake_word - script.execute: set_idle_or_mute_phase - script.execute: control_leds - script.execute: draw_display on_client_disconnected: - script.execute: stop_wake_word - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id}; - script.execute: control_leds - script.execute: draw_display # Timer event stubs - HA handles actual timer logic on_timer_started: - logger.log: "Timer started (handled by HA)" on_timer_finished: - logger.log: "Timer finished (handled by HA automation)" on_timer_cancelled: - logger.log: "Timer cancelled (handled by HA)" on_timer_updated: - logger.log: "Timer updated (handled by HA)" on_timer_tick: - lambda: return; # --- sensor --- sensor: - platform: homeassistant id: timer_remaining name: "Timer remaining" entity_id: sensor.${timer_area}_timer_remaining_seconds unit_of_measurement: "s" device_class: "duration" on_value: then: - script.execute: control_leds - script.execute: draw_display - platform: homeassistant id: timer_duration name: "Timer duration" entity_id: sensor.${timer_area}_timer_remaining_seconds attribute: duration_seconds unit_of_measurement: "s" - platform: homeassistant id: timer_progress entity_id: sensor.${timer_area}_timer_remaining_seconds attribute: progress_percent internal: true text_sensor: - platform: homeassistant id: timer_state entity_id: sensor.${timer_area}_timer_remaining_seconds attribute: timer_state internal: true on_value: then: - script.execute: control_leds - script.execute: draw_display # --- binary_sensor --- binary_sensor: - platform: gpio pin: number: GPIO0 mode: INPUT_PULLUP inverted: true id: boot_button internal: true on_multi_click: - timing: - ON for at least 50ms - OFF for at least 50ms then: - switch.turn_off: timer_ringing - timing: - ON for at least 10s then: - button.press: factory_reset_btn button: - platform: restart id: restart_btn name: Restart - platform: factory_reset id: factory_reset_btn internal: true # --- light --- light: - platform: esp32_rmt_led_strip id: status_ring name: "Status Ring" pin: GPIO38 num_leds: ${led_num} rgb_order: GRB chipset: WS2812 rmt_symbols: 192 restore_mode: ALWAYS_OFF default_transition_length: 0ms effects: - addressable_lambda: name: "Timer Progress" update_interval: 100ms lambda: |- std::string state = id(timer_state).state; float remaining = id(timer_remaining).state; float duration = id(timer_duration).state; int num_leds = it.size(); int leds_to_light = 0; if ((state == "active" || state == "paused") && duration > 0 && !std::isnan(remaining)) { float progress = remaining / duration; leds_to_light = (int)(progress * num_leds + 0.5f); } Color active_color = (state == "paused") ? Color(0, 128, 255) : Color(0, 255, 0); for (int i = 0; i < num_leds; i++) { it[i] = (i < leds_to_light) ? active_color : Color::BLACK; } - addressable_scan: name: "Scan" move_interval: 50ms scan_width: 2 - pulse: name: "Pulse Slow" transition_length: 1s update_interval: 1s - pulse: name: "Pulse Fast" transition_length: 250ms update_interval: 250ms # --- switch --- output: - platform: ledc pin: ${display_backlight_pin} id: backlight_output switch: - platform: template id: mute name: "Mute" icon: "mdi:microphone-off" optimistic: true restore_mode: RESTORE_DEFAULT_OFF on_turn_off: - microphone.unmute: - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; - script.execute: control_leds - script.execute: draw_display on_turn_on: - microphone.mute: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; - script.execute: control_leds - script.execute: draw_display - platform: template id: timer_ringing name: "Timer Ringing" icon: "mdi:bell-ring-outline" optimistic: true restore_mode: ALWAYS_OFF on_turn_off: - lambda: |- id(external_media_player) ->make_call() .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_OFF) .set_announcement(true) .perform(); - media_player.stop: announcement: true - script.execute: set_idle_or_mute_phase - script.execute: control_leds - script.execute: draw_display on_turn_on: - lambda: id(voice_assistant_phase) = ${voice_assist_timer_finished_phase_id}; - script.execute: control_leds - script.execute: draw_display - lambda: |- id(external_media_player) ->make_call() .set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_ONE) .set_announcement(true) .perform(); - media_player.speaker.play_on_device_media_file: media_file: timer_finished_sound announcement: true - delay: 15min - switch.turn_off: timer_ringing # --- globals --- globals: - id: init_in_progress type: bool restore_value: no initial_value: "true" - id: voice_assistant_phase type: int restore_value: no initial_value: ${voice_assist_not_ready_phase_id} - id: ota_progress type: int restore_value: no initial_value: "0" - id: led_ring_color_r type: float restore_value: yes initial_value: "1.0" - id: led_ring_color_g type: float restore_value: yes initial_value: "0.0" - id: led_ring_color_b type: float restore_value: yes initial_value: "1.0" # --- script --- script: - id: draw_display then: - if: condition: lambda: return !id(init_in_progress); then: - if: condition: wifi.connected: then: - if: condition: api.connected: then: - lambda: | switch(id(voice_assistant_phase)) { case ${voice_assist_listening_phase_id}: id(lcd_display).show_page(listening_page); break; case ${voice_assist_thinking_phase_id}: id(lcd_display).show_page(thinking_page); break; case ${voice_assist_replying_phase_id}: id(lcd_display).show_page(replying_page); break; case ${voice_assist_error_phase_id}: id(lcd_display).show_page(error_page); break; case ${voice_assist_muted_phase_id}: id(lcd_display).show_page(muted_page); break; case ${voice_assist_not_ready_phase_id}: id(lcd_display).show_page(no_ha_page); break; case ${voice_assist_timer_finished_phase_id}: id(lcd_display).show_page(timer_finished_page); break; case ${voice_assist_ota_phase_id}: id(lcd_display).show_page(ota_page); break; default: id(lcd_display).show_page(idle_page); } id(lcd_display).update(); else: - display.page.show: no_ha_page - component.update: lcd_display else: - display.page.show: no_wifi_page - component.update: lcd_display else: - display.page.show: initializing_page - component.update: lcd_display - id: control_leds mode: single then: - lambda: |- const bool wifi_connected = id(wifi_id).is_connected(); const bool api_connected = id(api_id).is_connected(); const int phase = id(voice_assistant_phase); std::string timer_state_str = id(timer_state).state; bool timer_running = (timer_state_str == "active" || timer_state_str == "paused"); if (id(init_in_progress)) { auto call = id(status_ring).turn_on(); call.set_effect("Pulse Slow"); call.set_rgb(1.0f, 0.0f, 1.0f); call.perform(); return; } if (!wifi_connected) { auto call = id(status_ring).turn_on(); call.set_effect("Pulse Fast"); call.set_rgb(1.0f, 0.0f, 0.0f); call.perform(); return; } if (!api_connected) { auto call = id(status_ring).turn_on(); call.set_effect("Pulse Fast"); call.set_rgb(1.0f, 0.2f, 0.0f); call.perform(); return; } if (id(timer_ringing).state) { auto call = id(status_ring).turn_on(); call.set_effect("Pulse Fast"); call.set_rgb(1.0f, 0.0f, 0.0f); call.perform(); return; } if (timer_running) { auto call = id(status_ring).turn_on(); call.set_effect("Timer Progress"); call.perform(); return; } switch (phase) { case ${voice_assist_listening_phase_id}: case ${voice_assist_thinking_phase_id}: case ${voice_assist_replying_phase_id}: { auto call = id(status_ring).turn_on(); call.set_effect("Scan"); call.set_rgb(0.0f, 1.0f, 1.0f); call.perform(); } break; case ${voice_assist_error_phase_id}: { auto call = id(status_ring).turn_on(); call.set_rgb(1.0f, 0.0f, 0.0f); call.perform(); } break; case ${voice_assist_muted_phase_id}: { auto call = id(status_ring).turn_on(); call.set_rgb(0.0f, 0.0f, 1.0f); call.perform(); } break; default: id(status_ring).turn_off().perform(); break; } - id: start_wake_word then: - if: condition: not: voice_assistant.is_running then: - lambda: id(va).set_use_wake_word(false); - micro_wake_word.start: - id: stop_wake_word then: - micro_wake_word.stop: - id: set_idle_or_mute_phase then: - if: condition: switch.is_off: mute then: - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; else: - lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id}; # --- image --- image: - file: ${error_illustration_file} id: casita_error resize: 240x240 type: RGB - file: ${idle_illustration_file} id: casita_idle resize: 240x240 type: RGB - file: ${listening_illustration_file} id: casita_listening resize: 240x240 type: RGB - file: ${thinking_illustration_file} id: casita_thinking resize: 240x240 type: RGB - file: ${replying_illustration_file} id: casita_replying resize: 240x240 type: RGB - file: ${timer_finished_illustration_file} id: casita_timer_finished resize: 240x240 type: RGB - file: ${loading_illustration_file} id: casita_initializing resize: 240x240 type: RGB - file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-wifi.png id: error_no_wifi resize: 240x240 type: RGB - file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-ha.png id: error_no_ha resize: 240x240 type: RGB # --- font --- font: - file: type: gfonts family: ${font_family} weight: 400 id: font_status size: 16 glyphsets: - ${font_glyphsets} - file: type: gfonts family: ${font_family} weight: 700 id: font_ota size: 20 glyphsets: - ${font_glyphsets} # --- color --- color: - id: idle_color hex: ${idle_illustration_background_color} - id: listening_color hex: ${listening_illustration_background_color} - id: error_color hex: ${error_illustration_background_color} # --- display --- display: - platform: mipi_spi id: lcd_display model: ST7789V dimensions: height: ${display_height} width: ${display_width} offset_height: 20 offset_width: 0 rotation: 0 spi_id: display_spi cs_pin: ${display_cs_pin} dc_pin: ${display_dc_pin} reset_pin: tca9555: ioexp number: 0 # EXIO0 = LCD_RST invert_colors: true color_order: RGB data_rate: 40MHz update_interval: never pages: - id: idle_page lambda: |- it.fill(id(idle_color)); it.image(120, 0, id(casita_idle), ImageAlign::TOP_CENTER); // Timer progress bar at bottom std::string state = id(timer_state).state; if (state == "active" || state == "paused") { int remaining = (int)id(timer_remaining).state; int duration = (int)id(timer_duration).state; if (duration > 0) { int progress_width = (240 * remaining) / duration; Color bar_color = (state == "paused") ? Color(0, 128, 255) : Color(0, 255, 0); it.filled_rectangle(0, 265, 240, 15, Color::WHITE); it.filled_rectangle(0, 266, progress_width, 13, bar_color); } } - id: listening_page lambda: |- it.fill(id(listening_color)); it.image(120, 0, id(casita_listening), ImageAlign::TOP_CENTER); - id: thinking_page lambda: |- it.fill(id(listening_color)); it.image(120, 0, id(casita_thinking), ImageAlign::TOP_CENTER); - id: replying_page lambda: |- it.fill(id(listening_color)); it.image(120, 0, id(casita_replying), ImageAlign::TOP_CENTER); - id: timer_finished_page lambda: |- it.fill(id(idle_color)); it.image(120, 0, id(casita_timer_finished), ImageAlign::TOP_CENTER); - id: error_page lambda: |- it.fill(id(error_color)); it.image(120, 0, id(casita_error), ImageAlign::TOP_CENTER); - id: no_ha_page lambda: |- it.fill(Color::BLACK); it.image(120, 0, id(error_no_ha), ImageAlign::TOP_CENTER); - id: no_wifi_page lambda: |- it.fill(Color::BLACK); it.image(120, 0, id(error_no_wifi), ImageAlign::TOP_CENTER); - id: initializing_page lambda: |- it.fill(Color::BLACK); it.image(120, 0, id(casita_initializing), ImageAlign::TOP_CENTER); it.printf(120, 250, id(font_status), Color::WHITE, TextAlign::TOP_CENTER, "Initializing..."); - id: muted_page lambda: |- it.fill(Color::BLACK); it.printf(120, 140, id(font_status), Color::WHITE, TextAlign::CENTER, "Microphone muted"); - id: ota_page lambda: |- it.fill(id(error_color)); it.image(120, 0, id(casita_error), ImageAlign::TOP_CENTER); it.filled_rectangle(10, 250, 220, 20, Color::WHITE); int progress_width = (id(ota_progress) * 216) / 100; if (progress_width > 0) { it.filled_rectangle(12, 252, progress_width, 16, Color(255, 100, 0)); } it.printf(120, 232, id(font_ota), Color::WHITE, TextAlign::TOP_CENTER, "OTA: %d%%", id(ota_progress));