Espressif ESP32-S3-BOX-3 Voice Assistant¶
Complete configuration for the Espressif ESP32-S3-BOX-3 with persistent Home Assistant timers.
Hardware Overview¶
| Feature | Value |
|---|---|
| Board | ESP32-S3 with PSRAM |
| Display | 320x240 ILI9341/ILI9xxx LCD |
| Touch | GT911 Capacitive |
| Audio | ES7210 ADC + ES8311 DAC |
| Wake Word | On-device micro_wake_word |
| Additional | Mute button, presence sensor, battery monitoring |
Prerequisites¶
- ESPHome 2025.5.0 or newer
- Home Assistant with:
- Timer entity matching your area (e.g.,
timer.playroom) - Template sensor for timer remaining seconds
- Intent scripts for timer control
- Timer finished automation
Complete Configuration¶
Download the complete ESPHome configuration file:
Download esp32-s3-box-3-voice-assistant.yaml
Configuration¶
Substitutions¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: substitutions
substitutions:
device_name: "${timer_area}-voice-assistant"
friendly_name: "${timer_area} Voice Assistant"
device_description: "ESP32-S3-BOX-3"
# REQUIRED: Set this to match your HA area
timer_area: "playroom"
# Generic voice assistant images
loading_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/loading_320_240.png
idle_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/idle_320_240.png
listening_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/listening_320_240.png
thinking_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/thinking_320_240.png
replying_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/replying_320_240.png
error_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/error_320_240.png
timer_finished_illustration_file: https://github.com/esphome/wake-word-voice-assistants/raw/main/casita/timer_finished_320_240.png
# Background colors
loading_illustration_background_color: "000000"
idle_illustration_background_color: "000000"
listening_illustration_background_color: "FFFFFF"
thinking_illustration_background_color: "FFFFFF"
replying_illustration_background_color: "FFFFFF"
error_illustration_background_color: "000000"
# Voice assistant phase IDs
voice_assist_idle_phase_id: "1"
voice_assist_listening_phase_id: "2"
voice_assist_thinking_phase_id: "3"
voice_assist_replying_phase_id: "4"
voice_assist_not_ready_phase_id: "10"
voice_assist_error_phase_id: "11"
voice_assist_muted_phase_id: "12"
voice_assist_timer_finished_phase_id: "20"
voice_assist_ota_phase_id: "30"
# Font configuration
allowed_characters: " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~°"
font_glyphsets: "GF_Latin_Core"
font_family: Figtree
ESPHome Core¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: esphome
esphome:
name: ${device_name}
friendly_name: ${friendly_name}
comment: ${device_description}
min_version: 2025.5.0
name_add_mac_suffix: false
on_boot:
- priority: 600
then:
- script.execute: draw_display
- delay: 30s
- if:
condition:
lambda: return id(init_in_progress);
then:
- lambda: id(init_in_progress) = false;
- script.execute: draw_display
ESP32 Platform¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: esp32
esp32:
board: esp32s3box
flash_size: 16MB
cpu_frequency: 240MHz
framework:
type: esp-idf
sdkconfig_options:
CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240: "y"
CONFIG_ESP32S3_DATA_CACHE_64KB: "y"
CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y"
psram:
mode: octal
speed: 80MHz
API Services¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: api
api:
on_client_connected:
- script.execute: draw_display
on_client_disconnected:
- script.execute: draw_display
services:
- service: timer_finished
then:
- logger.log: "Timer finished! Playing alarm..."
- switch.turn_on: timer_ringing
- service: timer_started
variables:
duration: int
then:
- logger.log:
format: "Timer started with duration: %d seconds"
args: ["duration"]
- script.execute: draw_display
- service: timer_cancelled
then:
- logger.log: "Timer cancelled"
- switch.turn_off: timer_ringing
- script.execute: draw_display
- service: stop_alarm
then:
- switch.turn_off: timer_ringing
OTA, Logger, WiFi, Time¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: ota
ota:
- platform: esphome
id: ota_esphome
on_begin:
- script.execute: stop_wake_word
- lambda: |-
id(voice_assistant_phase) = ${voice_assist_ota_phase_id};
id(ota_progress) = 0;
- display.page.show: ota_page
- component.update: s3_box_lcd
on_progress:
- lambda: id(ota_progress) = (int)x;
- component.update: s3_box_lcd
on_end:
- lambda: id(ota_progress) = 100;
- component.update: s3_box_lcd
on_error:
- lambda: id(ota_progress) = -1;
- display.page.show: error_page
- component.update: s3_box_lcd
- delay: 5s
- script.execute: draw_display
logger:
level: DEBUG
hardware_uart: USB_SERIAL_JTAG
logs:
text_sensor: WARN
sensor: WARN
component: ERROR
wifi:
ssid: !secret wifi_ssid
password: !secret wifi_password
on_connect:
- script.execute: draw_display
on_disconnect:
- script.execute: draw_display
time:
- platform: sntp
id: sntp_time
servers: !secret ntp_servers
timezone: !secret timezone
Timer Sync Intervals¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: interval
interval:
- interval: 30s
then:
- lambda: |-
if (id(voice_assistant_phase) == ${voice_assist_idle_phase_id} ||
id(voice_assistant_phase) == ${voice_assist_muted_phase_id}) {
std::string state = id(timer_state).state;
if (state == "active" || state == "paused") {
ESP_LOGD("timer_sync", "Timer is %s but display is idle - triggering redraw", state.c_str());
id(draw_display).execute();
}
}
Buttons¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: button
button:
- platform: restart
id: restart_btn
name: Restart
- platform: factory_reset
id: factory_reset_btn
internal: true
Home Assistant Timer Sensors¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: sensor
sensor:
- platform: template
name: "Voice Assistant Phase"
id: voice_assistant_phase_sensor
lambda: |-
return (float)id(voice_assistant_phase);
update_interval: 500ms
- platform: homeassistant
id: timer_remaining
name: "Timer remaining"
entity_id: sensor.${timer_area}_timer_remaining_seconds
unit_of_measurement: "s"
device_class: "duration"
on_value:
then:
- script.execute: draw_display
- platform: homeassistant
id: timer_duration
name: "Timer duration"
entity_id: sensor.${timer_area}_timer_remaining_seconds
attribute: duration_seconds
unit_of_measurement: "s"
device_class: "duration"
- platform: homeassistant
id: timer_progress
entity_id: sensor.${timer_area}_timer_remaining_seconds
attribute: progress_percent
internal: true
- platform: aht10
i2c_id: bus_b
variant: AHT20
temperature:
name: "Temperature"
id: s3temp
humidity:
name: "Humidity"
update_interval: 60s
- platform: adc
pin: GPIO10
id: battery_voltage
unit_of_measurement: "V"
accuracy_decimals: 1
device_class: "voltage"
entity_category: "diagnostic"
update_interval: 30s
attenuation: auto
filters:
- multiply: 4.11
- platform: copy
id: battery_percent
source_id: battery_voltage
name: "Battery level"
unit_of_measurement: "%"
accuracy_decimals: 0
device_class: "battery"
entity_category: "diagnostic"
filters:
- lambda: return (x - 2.7) / (4.2 - 2.7) * 100;
- clamp:
min_value: 0
max_value: 100
- platform: wifi_signal
name: "WiFi db"
id: wifi_signal_db
update_interval: 30s
- platform: copy
source_id: wifi_signal_db
name: "WiFi Signal"
id: wifi_percent
filters:
- lambda: return min(max(2 * (x + 100.0), 0.0), 100.0);
unit_of_measurement: "%"
entity_category: "diagnostic"
Touch and Binary Sensors¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: touchscreen
touchscreen:
- platform: gt911
i2c_id: bus_a
address: 0x5D
id: gt911_touchscreen
interrupt_pin:
number: GPIO3
ignore_strapping_warning: true
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: binary_sensor
binary_sensor:
- platform: gpio
pin:
number: GPIO21
name: "Presence detect"
device_class: "occupancy"
- platform: gt911
id: touch_area
index: 0
on_press:
then:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- if:
condition:
switch.is_on: timer_ringing
then:
- switch.turn_off: timer_ringing
else:
- if:
condition:
voice_assistant.is_running:
then:
- voice_assistant.stop:
else:
- if:
condition:
media_player.is_announcing:
then:
media_player.stop:
announcement: true
else:
- if:
condition:
media_player.is_playing:
then:
- media_player.pause:
else:
- if:
condition:
and:
- lambda: return !id(is_muted);
- not: voice_assistant.is_running
then:
- media_player.speaker.play_on_device_media_file:
media_file: wake_word_triggered_sound_file
announcement: true
- wait_until:
- not:
- media_player.is_announcing:
- voice_assistant.start:
- platform: gpio
pin:
number: GPIO0
mode: INPUT_PULLUP
inverted: true
id: left_top_button
internal: true
on_multi_click:
- timing:
- ON for at least 50ms
- OFF for at least 50ms
then:
- switch.turn_off: timer_ringing
- timing:
- ON for at least 10s
then:
- button.press: factory_reset_btn
- platform: gpio
pin:
number: GPIO1
mode: INPUT_PULLUP
inverted: true
id: mute_button
name: "Mute Button"
internal: true
trigger_on_initial_state: true
on_press:
then:
- script.execute: enable_mute
on_release:
then:
- script.execute: disable_mute
I2C, I2S Audio, Microphone, Speaker¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: i2c
i2c:
- id: bus_a
sda: GPIO08
scl: GPIO18
scan: true
sda_pullup_enabled: true
scl_pullup_enabled: true
frequency: 100kHz
- sda: GPIO41
scl: GPIO40
scan: true
sda_pullup_enabled: true
scl_pullup_enabled: true
frequency: 50kHz
id: bus_b
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: audio
i2s_audio:
- id: i2s_audio_bus
i2s_lrclk_pin: GPIO45
i2s_bclk_pin: GPIO17
i2s_mclk_pin: GPIO2
audio_adc:
- platform: es7210
id: es7210_adc
bits_per_sample: 16bit
sample_rate: 16000
i2c_id: bus_a
audio_dac:
- platform: es8311
id: es8311_dac
bits_per_sample: 16bit
sample_rate: 48000
i2c_id: bus_a
microphone:
- platform: i2s_audio
id: box_mic
sample_rate: 16000
i2s_din_pin: GPIO16
bits_per_sample: 16bit
adc_type: external
speaker:
- id: i2s_audio_speaker
platform: i2s_audio
i2s_audio_id: i2s_audio_bus
i2s_dout_pin: GPIO15
dac_type: external
sample_rate: 48000
bits_per_sample: 16bit
channel: left
audio_dac: es8311_dac
buffer_duration: 100ms
media_player:
- platform: speaker
name: None
id: speaker_media_player
volume_min: 0.5
volume_max: 0.8
task_stack_in_psram: true
announcement_pipeline:
speaker: i2s_audio_speaker
format: FLAC
sample_rate: 48000
num_channels: 1
files:
- id: timer_finished_sound
file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac
- id: wake_word_triggered_sound_file
file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/wake_word_triggered.flac
on_announcement:
- lambda: id(announcement_in_progress) = true;
- script.execute: track_announcement_lifecycle
- if:
condition:
- microphone.is_capturing:
then:
- script.execute: stop_wake_word
- if:
condition:
and:
- not:
voice_assistant.is_running:
- switch.is_off: timer_ringing
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- script.execute: draw_display
on_idle:
- delay: 100ms
- if:
condition:
and:
- not:
voice_assistant.is_running:
- switch.is_off: timer_ringing
- not:
media_player.is_announcing:
- lambda: return !id(announcement_in_progress);
then:
- script.execute: start_wake_word
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
Wake Word and Voice Assistant¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: wake_word
micro_wake_word:
id: mww
models:
- model: okay_nabu
id: okay_nabu
- model: https://github.com/kahrendt/microWakeWord/releases/download/stop/stop.json
id: stop
internal: true
vad:
model: github://esphome/micro-wake-word-models/models/v2/vad.json
on_wake_word_detected:
- voice_assistant.start:
wake_word: !lambda return wake_word;
voice_assistant:
id: va
microphone: box_mic
media_player: speaker_media_player
micro_wake_word: mww
noise_suppression_level: 2
auto_gain: 31dBFS
volume_multiplier: 2.0
on_listening:
- lambda: id(voice_assistant_phase) = ${voice_assist_listening_phase_id};
- text_sensor.template.publish:
id: text_request
state: "..."
- text_sensor.template.publish:
id: text_response
state: "..."
- script.execute: draw_display
on_stt_vad_end:
- lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id};
- script.execute: draw_display
on_stt_end:
- text_sensor.template.publish:
id: text_request
state: !lambda return x;
- script.execute: draw_display
on_tts_start:
- text_sensor.template.publish:
id: text_response
state: !lambda return x;
- lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id};
- script.execute: draw_display
on_end:
- if:
condition:
- lambda: return id(announcement_in_progress);
then:
- logger.log: "on_end: Skipping - announcement in progress"
else:
- wait_until:
condition:
- media_player.is_announcing:
timeout: 3s
- wait_until:
- and:
- not:
media_player.is_announcing:
- not:
speaker.is_playing:
- lambda: id(va).set_use_wake_word(false);
- micro_wake_word.start:
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
- text_sensor.template.publish:
id: text_request
state: ""
- text_sensor.template.publish:
id: text_response
state: ""
on_error:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id};
- script.execute: draw_display
- delay: 1s
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
on_client_connected:
- lambda: id(init_in_progress) = false;
- script.execute: start_wake_word
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
on_client_disconnected:
- script.execute: stop_wake_word
- lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id};
- script.execute: draw_display
# Timer Event Stubs - HA handles actual timer logic
on_timer_started:
- logger.log:
format: "Timer started (handled by HA): %s"
args: ["timer.id.c_str()"]
on_timer_finished:
- logger.log: "Timer finished event received (handled by HA automation)"
on_timer_cancelled:
- logger.log: "Timer cancelled (handled by HA)"
on_timer_updated:
- logger.log: "Timer updated (handled by HA)"
on_timer_tick:
- lambda: return;
Text Sensors¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: text_sensor
text_sensor:
- platform: template
name: "Voice Assistant State"
id: voice_assistant_state_sensor
lambda: |-
int phase = id(voice_assistant_phase);
switch(phase) {
case ${voice_assist_idle_phase_id}: return {"idle"};
case ${voice_assist_listening_phase_id}: return {"listening"};
case ${voice_assist_thinking_phase_id}: return {"thinking"};
case ${voice_assist_replying_phase_id}: return {"replying"};
case ${voice_assist_error_phase_id}: return {"error"};
case ${voice_assist_not_ready_phase_id}: return {"not_ready"};
case ${voice_assist_muted_phase_id}: return {"muted"};
case ${voice_assist_timer_finished_phase_id}: return {"timer_finished"};
default: return {"unknown"};
}
update_interval: 500ms
- platform: homeassistant
id: timer_state
entity_id: sensor.${timer_area}_timer_remaining_seconds
attribute: timer_state
internal: true
on_value:
then:
- script.execute: draw_display
- id: text_request
platform: template
on_value:
lambda: |-
if(id(text_request).state.length()>32) {
std::string name = id(text_request).state.c_str();
std::string truncated = esphome::str_truncate(name.c_str(),31);
id(text_request).state = (truncated+"...").c_str();
}
- id: text_response
platform: template
on_value:
lambda: |-
if(id(text_response).state.length()>32) {
std::string name = id(text_response).state.c_str();
std::string truncated = esphome::str_truncate(name.c_str(),31);
id(text_response).state = (truncated+"...").c_str();
}
Switches¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: switch
output:
- platform: ledc
pin: GPIO47
id: backlight_output
light:
- platform: monochromatic
id: led
name: Screen
icon: "mdi:television"
entity_category: config
output: backlight_output
restore_mode: RESTORE_DEFAULT_ON
default_transition_length: 250ms
switch:
- platform: gpio
name: Speaker Enable
pin: GPIO46
restore_mode: RESTORE_DEFAULT_ON
entity_category: config
disabled_by_default: true
- platform: template
name: Mute
id: mute
icon: "mdi:microphone-off"
optimistic: false
restore_mode: RESTORE_DEFAULT_OFF
entity_category: config
lambda: |-
return id(is_muted);
turn_on_action:
- if:
condition:
lambda: return !id(is_muted);
then:
- script.execute: toggle_mute
turn_off_action:
- if:
condition:
lambda: return id(is_muted);
then:
- script.execute: toggle_mute
- platform: template
id: timer_ringing
name: "Timer Ringing"
icon: "mdi:bell-ring-outline"
optimistic: true
restore_mode: ALWAYS_OFF
on_turn_off:
- lambda: |-
id(speaker_media_player)
->make_call()
.set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_OFF)
.set_announcement(true)
.perform();
id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 0);
- media_player.stop:
announcement: true
- script.execute: set_idle_or_mute_phase
- script.execute: draw_display
on_turn_on:
- lambda: id(voice_assistant_phase) = ${voice_assist_timer_finished_phase_id};
- script.execute: draw_display
- lambda: |-
id(speaker_media_player)
->make_call()
.set_command(media_player::MediaPlayerCommand::MEDIA_PLAYER_COMMAND_REPEAT_ONE)
.set_announcement(true)
.perform();
id(speaker_media_player)->set_playlist_delay_ms(speaker::AudioPipelineType::ANNOUNCEMENT, 1000);
- media_player.speaker.play_on_device_media_file:
media_file: timer_finished_sound
announcement: true
- delay: 15min
- switch.turn_off: timer_ringing
Global Variables¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: globals
globals:
- id: init_in_progress
type: bool
restore_value: false
initial_value: "true"
- id: voice_assistant_phase
type: int
restore_value: false
initial_value: ${voice_assist_not_ready_phase_id}
- id: ota_progress
type: int
restore_value: false
initial_value: "0"
- id: announcement_in_progress
type: bool
restore_value: false
initial_value: "false"
- id: is_muted
type: bool
restore_value: false
initial_value: "false"
Scripts¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: script
script:
- id: draw_display
then:
- if:
condition:
lambda: return !id(init_in_progress);
then:
- if:
condition:
wifi.connected:
then:
- if:
condition:
api.connected:
then:
- lambda: |
switch(id(voice_assistant_phase)) {
case ${voice_assist_listening_phase_id}:
id(s3_box_lcd).show_page(listening_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_thinking_phase_id}:
id(s3_box_lcd).show_page(thinking_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_replying_phase_id}:
id(s3_box_lcd).show_page(replying_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_error_phase_id}:
id(s3_box_lcd).show_page(error_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_muted_phase_id}:
id(s3_box_lcd).show_page(muted_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_not_ready_phase_id}:
id(s3_box_lcd).show_page(no_ha_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_timer_finished_phase_id}:
id(s3_box_lcd).show_page(timer_finished_page);
id(s3_box_lcd).update();
break;
case ${voice_assist_ota_phase_id}:
id(s3_box_lcd).show_page(ota_page);
id(s3_box_lcd).update();
break;
default:
id(s3_box_lcd).show_page(idle_page);
id(s3_box_lcd).update();
}
else:
- display.page.show: no_ha_page
- component.update: s3_box_lcd
else:
- display.page.show: no_wifi_page
- component.update: s3_box_lcd
else:
- display.page.show: initializing_page
- component.update: s3_box_lcd
- id: draw_timer_timeline
then:
- lambda: |
std::string state = id(timer_state).state;
int remaining = (int)id(timer_remaining).state;
int duration = (int)id(timer_duration).state;
if (state == "active" && duration > 0 && remaining > 0) {
int active_pixels = (320 * remaining) / duration;
if (active_pixels > 0) {
id(s3_box_lcd).filled_rectangle(0, 225, 320, 15, Color::WHITE);
id(s3_box_lcd).filled_rectangle(0, 226, active_pixels, 13, id(active_timer_color));
}
} else if (state == "paused" && duration > 0 && remaining > 0) {
int active_pixels = (320 * remaining) / duration;
if (active_pixels > 0) {
id(s3_box_lcd).filled_rectangle(0, 225, 320, 15, Color::WHITE);
id(s3_box_lcd).filled_rectangle(0, 226, active_pixels, 13, id(paused_timer_color));
}
}
- id: draw_active_timer_widget
then:
- lambda: |
std::string state = id(timer_state).state;
int remaining = (int)id(timer_remaining).state;
if (state == "active" || state == "paused") {
id(s3_box_lcd).filled_rectangle(80, 40, 160, 50, Color::WHITE);
id(s3_box_lcd).rectangle(80, 40, 160, 50, Color::BLACK);
int hours_left = remaining / 3600;
int minutes_left = (remaining - hours_left * 3600) / 60;
int seconds_left = remaining - hours_left * 3600 - minutes_left * 60;
auto display_hours = (hours_left < 10 ? "0" : "") + std::to_string(hours_left);
auto display_minute = (minutes_left < 10 ? "0" : "") + std::to_string(minutes_left);
auto display_seconds = (seconds_left < 10 ? "0" : "") + std::to_string(seconds_left);
std::string display_string = "";
if (hours_left > 0) {
display_string = display_hours + ":" + display_minute;
} else {
display_string = display_minute + ":" + display_seconds;
}
id(s3_box_lcd).printf(120, 47, id(font_timer), Color::BLACK, "%s", display_string.c_str());
}
- id: enable_mute
mode: single
then:
- script.execute: stop_wake_word
- delay: 100ms
- microphone.mute:
- lambda: id(is_muted) = true;
- switch.template.publish:
id: mute
state: ON
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- script.execute: draw_display
- id: disable_mute
mode: single
then:
- lambda: id(is_muted) = false;
- switch.template.publish:
id: mute
state: OFF
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
- script.execute: draw_display
- microphone.unmute:
- delay: 100ms
- script.execute: start_wake_word
- id: toggle_mute
mode: restart
then:
- if:
condition:
lambda: return id(is_muted);
then:
- lambda: id(is_muted) = false;
- switch.template.publish:
id: mute
state: OFF
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
- script.execute: draw_display
- microphone.unmute:
- delay: 100ms
- script.execute: start_wake_word
else:
- script.execute: stop_wake_word
- delay: 100ms
- microphone.mute:
- lambda: id(is_muted) = true;
- switch.template.publish:
id: mute
state: ON
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- script.execute: draw_display
- id: start_wake_word
then:
- if:
condition:
lambda: return !id(is_muted);
then:
- if:
condition:
not:
- voice_assistant.is_running:
then:
- lambda: id(va).set_use_wake_word(false);
- micro_wake_word.start:
- id: stop_wake_word
then:
- micro_wake_word.stop:
- id: set_idle_or_mute_phase
then:
- if:
condition:
lambda: return !id(is_muted);
then:
- lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id};
else:
- lambda: id(voice_assistant_phase) = ${voice_assist_muted_phase_id};
- id: track_announcement_lifecycle
mode: restart
then:
- logger.log: "Announcement lifecycle: waiting for audio to start..."
- wait_until:
condition:
- media_player.is_announcing:
timeout: 30s
- if:
condition:
- not:
media_player.is_announcing:
then:
- logger.log: "Announcement lifecycle: timed out waiting for audio"
- lambda: id(announcement_in_progress) = false;
else:
- logger.log: "Announcement lifecycle: audio playing, waiting for completion..."
- wait_until:
condition:
- not:
media_player.is_announcing:
timeout: 10min
- logger.log: "Announcement lifecycle: complete"
- lambda: id(announcement_in_progress) = false;
Images, Fonts, Colors¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: image
image:
- file: ${error_illustration_file}
id: casita_error
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${idle_illustration_file}
id: casita_idle
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${listening_illustration_file}
id: casita_listening
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${thinking_illustration_file}
id: casita_thinking
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${replying_illustration_file}
id: casita_replying
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${timer_finished_illustration_file}
id: casita_timer_finished
resize: 320x240
type: RGB
transparency: alpha_channel
- file: ${loading_illustration_file}
id: casita_initializing
resize: 320x240
type: RGB
transparency: alpha_channel
- file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-wifi.png
id: error_no_wifi
resize: 320x240
type: RGB
transparency: alpha_channel
- file: https://github.com/esphome/wake-word-voice-assistants/raw/main/error_box_illustrations/error-no-ha.png
id: error_no_ha
resize: 320x240
type: RGB
transparency: alpha_channel
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: font
font:
- file:
type: gfonts
family: ${font_family}
weight: 300
italic: true
id: font_request
size: 15
glyphsets:
- ${font_glyphsets}
- file:
type: gfonts
family: ${font_family}
weight: 300
id: font_response
size: 15
glyphsets:
- ${font_glyphsets}
- file:
type: gfonts
family: ${font_family}
weight: 300
id: font_timer
size: 30
glyphsets:
- ${font_glyphsets}
- file:
type: gfonts
family: ${font_family}
weight: 700
id: font_ota
size: 24
glyphsets:
- ${font_glyphsets}
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: color
color:
- id: idle_color
hex: ${idle_illustration_background_color}
- id: listening_color
hex: ${listening_illustration_background_color}
- id: thinking_color
hex: ${thinking_illustration_background_color}
- id: replying_color
hex: ${replying_illustration_background_color}
- id: loading_color
hex: ${loading_illustration_background_color}
- id: error_color
hex: ${error_illustration_background_color}
- id: active_timer_color
hex: "26ed3a"
- id: paused_timer_color
hex: "3b89e3"
- id: ota_progress_color
hex: "ff6600"
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: spi
spi:
- id: spi_bus
clk_pin: 7
mosi_pin: 6
Display¶
# file: esphome/examples/esp32-s3-box-3-voice-assistant.yaml
# section: display
display:
- platform: ili9xxx
id: s3_box_lcd
model: S3BOX
invert_colors: false
data_rate: 40MHz
cs_pin: 5
dc_pin: 4
reset_pin:
number: 48
inverted: true
update_interval: never
pages:
- id: idle_page
lambda: |-
it.fill(id(idle_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_idle), ImageAlign::CENTER);
id(draw_timer_timeline).execute();
- id: listening_page
lambda: |-
it.fill(id(listening_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_listening), ImageAlign::CENTER);
id(draw_timer_timeline).execute();
- id: thinking_page
lambda: |-
it.fill(id(thinking_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_thinking), ImageAlign::CENTER);
it.filled_rectangle(20, 20, 280, 30, Color::WHITE);
it.rectangle(20, 20, 280, 30, Color::BLACK);
it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str());
id(draw_timer_timeline).execute();
- id: replying_page
lambda: |-
it.fill(id(replying_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_replying), ImageAlign::CENTER);
it.filled_rectangle(20, 20, 280, 30, Color::WHITE);
it.rectangle(20, 20, 280, 30, Color::BLACK);
it.filled_rectangle(20, 190, 280, 30, Color::WHITE);
it.rectangle(20, 190, 280, 30, Color::BLACK);
it.printf(30, 25, id(font_request), Color::BLACK, "%s", id(text_request).state.c_str());
it.printf(30, 195, id(font_response), Color::BLACK, "%s", id(text_response).state.c_str());
id(draw_timer_timeline).execute();
- id: timer_finished_page
lambda: |-
it.fill(id(idle_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_timer_finished), ImageAlign::CENTER);
- id: error_page
lambda: |-
it.fill(id(error_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_error), ImageAlign::CENTER);
- id: no_ha_page
lambda: |-
it.image((it.get_width() / 2), (it.get_height() / 2), id(error_no_ha), ImageAlign::CENTER);
- id: no_wifi_page
lambda: |-
it.image((it.get_width() / 2), (it.get_height() / 2), id(error_no_wifi), ImageAlign::CENTER);
- id: initializing_page
lambda: |-
it.fill(id(loading_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_initializing), ImageAlign::CENTER);
- id: muted_page
lambda: |-
it.fill(Color::BLACK);
id(draw_timer_timeline).execute();
id(draw_active_timer_widget).execute();
- id: ota_page
lambda: |-
it.fill(id(error_color));
it.image((it.get_width() / 2), (it.get_height() / 2), id(casita_error), ImageAlign::CENTER);
it.filled_rectangle(20, 200, 280, 30, Color::WHITE);
it.rectangle(20, 200, 280, 30, Color::BLACK);
int progress_width = (id(ota_progress) * 276) / 100;
if (progress_width > 0) {
it.filled_rectangle(22, 202, progress_width, 26, id(ota_progress_color));
}
if (id(ota_progress) >= 0) {
it.printf(160, 185, id(font_ota), Color::WHITE, TextAlign::BOTTOM_CENTER, "Upgrading: %d%%", id(ota_progress));
} else {
it.printf(160, 185, id(font_ota), Color::WHITE, TextAlign::BOTTOM_CENTER, "Update Failed!");
}
Testing¶
After flashing:
- Say "Okay Nabu" followed by "Set a timer for 1 minute"
- Verify the timer progress bar appears at the bottom of the display
- Wait for timer to complete or say "Cancel the timer"
- Verify the alarm plays and can be dismissed by touching the screen