working android auto

2026-03-16 22:42:54 +01:00
parent c9239a1f6b
commit 31598729b8
12 changed files with 1353 additions and 399 deletions
@@ -7,6 +7,17 @@ resolver = "2"
 rust-version = "1.82"
 license = "LGPL-3.0-or-later"

+[features]
+default = []
+# Build in nav-only mode: no H.264 video decode, only turn-by-turn text.
+# Saves ~300KB+ PSRAM and significant CPU. No esp_h264 component needed.
+nav-only = ["dep:miniz_oxide"]
+
+[profile.release]
+opt-level = 3
+lto = "fat"
+codegen-units = 1
+
 [dependencies]
 # ESP-IDF bindings (versions matched to ESP-IDF v5.5.1 in idf-rust:all_latest container)
 esp-idf-svc = { version = "0.52", features = ["alloc"] }
@@ -27,6 +38,9 @@ log = "0.4"
 # Bitfield for frame headers (reused from upstream)
 bitfield = "0.19"

+# PNG inflate for nav-only turn arrow images (pure Rust, no C deps)
+miniz_oxide = { version = "0.7", optional = true }
+
 [build-dependencies]
 embuild = "0.33"
 protobuf-codegen = "3.7"
@@ -8,6 +8,8 @@ BINARY_NAME="esp32-android-auto-nav"

 # Parse arguments
 BUILD_ONLY=false
+NAV_ONLY=false
+CARGO_FEATURES=""

 for arg in "$@"; do
  case $arg in
@@ -15,16 +17,28 @@ for arg in "$@"; do
      BUILD_ONLY=true
      shift
      ;;
+    -n|--nav-only)
+      NAV_ONLY=true
+      shift
+      ;;
    -h|--help)
      echo "Usage: ./build.sh [OPTIONS]"
      echo "Options:"
      echo "  -b, --build-only, --no-flash  Build only, skip flashing prompt"
+      echo "  -n, --nav-only                Nav-only mode: text turn-by-turn, no video"
      echo "  -h, --help                    Show this help message"
      exit 0
      ;;
  esac
 done

+if [ "$NAV_ONLY" = true ]; then
+    CARGO_FEATURES="--features nav-only"
+    echo "📍 Mode: NAV-ONLY (turn-by-turn text, no H.264 video)"
+else
+    echo "🎬 Mode: FULL VIDEO (H.264 decode + display)"
+fi
+
 echo "🔨 Building $BINARY_NAME (release)..."
 echo ""

@@ -36,7 +50,7 @@ sudo podman run --rm \
  -v $(pwd):/project \
  -w /project \
  docker.io/espressif/idf-rust:all_latest \
-  bash -c "export RUSTUP_HOME=/home/esp/.rustup && export CARGO_HOME=/home/esp/.cargo && source /home/esp/export-esp.sh && cargo build --release"
+  bash -c "export RUSTUP_HOME=/home/esp/.rustup && export CARGO_HOME=/home/esp/.cargo && source /home/esp/export-esp.sh && cargo build --release $CARGO_FEATURES"

 echo ""
 echo "✅ Build complete!"
@@ -0,0 +1 @@
+1
@@ -0,0 +1 @@
+error: custom toolchain 'esp' specified in override file '/project/rust-toolchain.toml' is not installed
@@ -0,0 +1 @@
+1
@@ -1,36 +1,61 @@
 # ESP-IDF sdkconfig defaults for Android Auto Nav Head Unit
 # Target: ESP32-S3, WT32-SC01 Plus

+# CPU at max frequency (240MHz) — critical for SW H.264 decode + I420→RGB565
+CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y
+CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ=240
+
+# Compiler optimization for performance (-O2 for ESP-IDF C code)
+CONFIG_COMPILER_OPTIMIZATION_PERF=y
+
 # Main task stack — needs to be large for TLS + protobuf
 CONFIG_ESP_MAIN_TASK_STACK_SIZE=32768

-# PSRAM (8MB on WT32-SC01 Plus — quad SPI, NOT octal)
+# PSRAM (2MB on WT32-SC01 Plus)
 CONFIG_SPIRAM=y
 CONFIG_SPIRAM_MODE_QUAD=y
 CONFIG_SPIRAM_SPEED_80M=y
-# Use MALLOC mode: regular malloc() falls back to PSRAM when internal SRAM is full.
-# This is critical because Rust's Vec/String use malloc, not heap_caps_malloc.
 CONFIG_SPIRAM_USE_MALLOC=y
-# Allocations <= 4KB stay in fast internal SRAM; larger ones go to PSRAM automatically
+# Allocations <= 4KB go to internal DRAM, larger ones to PSRAM.
+# The new strip-by-strip pipeline eliminates the 300KB PSRAM VideoFrame —
+# only the esp_h264 decoder's internal I420 buffers (~576KB) live in PSRAM.
 CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096
-# Reserve some internal memory for DMA/stack (default is fine but be explicit)
+# Reserve internal memory for DMA buffers (76.8KB) + ESP-IDF critical allocations
 CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
+# Allow thread stacks in PSRAM (decode+display thread)
+CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=y
+# Skip PSRAM memtest on boot (saves ~500ms startup)
+CONFIG_SPIRAM_MEMTEST=n

-# Flash (16MB on WT32-SC01 Plus)
+# Data Cache — maximize for PSRAM performance (H.264 decode reads PSRAM constantly)
+CONFIG_ESP32S3_DATA_CACHE_64KB=y
+CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
+
+# Instruction cache — 32KB reduces flash cache misses in hot decode/render loops
+CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
+
+# Flash (16MB on WT32-SC01 Plus) — 80MHz QIO for faster code fetch
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
+CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
+
+# LCD I80 bus — use PLL clock source for stable 40MHz pixel clock
+CONFIG_LCD_PERIPH_CLK_SRC_PLL160M=y

 # Bluetooth — BLE only (ESP32-S3 does NOT support Bluetooth Classic)
-# Android Auto wireless pairing via BT SPP is not available on this SoC.
 CONFIG_BT_ENABLED=y
 CONFIG_BT_BLE_ENABLED=y
 CONFIG_BT_NIMBLE_ENABLED=y

-# WiFi — required for Android Auto data transport
+# WiFi — minimize internal SRAM usage (leave room for DMA buffers)
 CONFIG_ESP_WIFI_ENABLED=y
-CONFIG_ESP_WIFI_STATIC_RX_BUFFER_NUM=10
-CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=32
-CONFIG_ESP_WIFI_DYNAMIC_TX_BUFFER_NUM=32
+CONFIG_ESP_WIFI_DYNAMIC_TX_BUFFER=y
+CONFIG_ESP_WIFI_TX_BUFFER_TYPE=1
+CONFIG_ESP_WIFI_DYNAMIC_TX_BUFFER_NUM=8
+CONFIG_ESP_WIFI_STATIC_RX_BUFFER_NUM=6
+CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=16
+CONFIG_ESP_WIFI_RX_BA_WIN=4
+CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y

 # H.264 software decoder (esp_h264 component)
 # Dual-task decoder for better FPS on ESP32-S3
@@ -41,20 +66,37 @@ CONFIG_ESP_H264_DUAL_TASK=1
 CONFIG_MBEDTLS_TLS_CLIENT=y
 CONFIG_MBEDTLS_TLS_SERVER=y
 CONFIG_MBEDTLS_SSL_ALPN=y
-CONFIG_MBEDTLS_CERTIFICATE_BUNDLE=y
-CONFIG_MBEDTLS_CERTIFICATE_BUNDLE_DEFAULT_FULL=y
+# Disable cert bundle — we only use our own AA cert, and server verify is NONE.
+# The full bundle wastes ~60KB of heap when parsed.
+CONFIG_MBEDTLS_CERTIFICATE_BUNDLE=n
 CONFIG_MBEDTLS_HARDWARE_AES=y
 CONFIG_MBEDTLS_HARDWARE_SHA=y
 CONFIG_MBEDTLS_KEY_EXCHANGE_RSA=y
+# Use default allocator (not internal-only) — RSA MPI needs >32KB of temp buffers
+# and 64KB dcache + 32KB icache + 77KB DMA staging exhaust internal DRAM.
+# PSRAM is fine for one-time handshake; AES-GCM encrypt/decrypt uses HW accel.
+CONFIG_MBEDTLS_DEFAULT_MEM_ALLOC=y

-# TCP/IP
+# TCP/IP — larger window for video streaming throughput
+# 32KB window + 32KB send buffer reduces TCP stalls when phone sends
+# bursty H.264 data.  LWIP buffers go to PSRAM (SPIRAM_TRY_ALLOCATE).
 CONFIG_LWIP_MAX_SOCKETS=10
-CONFIG_LWIP_TCP_SND_BUF_DEFAULT=8192
-CONFIG_LWIP_TCP_WND_DEFAULT=8192
+CONFIG_LWIP_TCP_SND_BUF_DEFAULT=32768
+CONFIG_LWIP_TCP_WND_DEFAULT=32768
 CONFIG_LWIP_TCP_RECVMBOX_SIZE=32

-# Logging
+# Logging — disable dynamic level checks (~10× faster log calls)
 CONFIG_LOG_DEFAULT_LEVEL_INFO=y
+CONFIG_LOG_DYNAMIC_LEVEL_CONTROL=n
+CONFIG_LOG_TAG_LEVEL_IMPL_NONE=y
+
+# FreeRTOS — 1ms ticks for responsive scheduling
+CONFIG_FREERTOS_HZ=1000
+
+# Task watchdog — 10s for heavy decode workload
+CONFIG_ESP_TASK_WDT_TIMEOUT_S=10
+# Disable interrupt WDT on Core 1 — long DMA waits during video decode are normal
+CONFIG_ESP_INT_WDT_CHECK_CPU1=n

 # mDNS — for Android Auto service discovery (_androidauto._tcp)
 CONFIG_MDNS_MAX_SERVICES=4
@@ -15,6 +15,8 @@ use crate::proto::Wifi;
 // ---------------------------------------------------------------------------

 /// Build a video channel descriptor (480p, 30fps).
+/// Always advertise real config — the phone requires a valid video channel.
+/// In nav-only mode, we accept setup but respond UNFOCUSED to prevent streaming.
 pub fn build_video_channel_descriptor(channel_id: ChannelId) -> Wifi::ChannelDescriptor {
    let mut chan = Wifi::ChannelDescriptor::new();
    chan.set_channel_id(channel_id as u32);
@@ -67,6 +69,37 @@ pub fn video_focus_frame(channel_id: ChannelId, focused: bool) -> Frame {
    Frame::new_encrypted(channel_id, data)
 }

+/// Build an unsolicited VideoFocusIndication (unrequested=true).
+/// Sent proactively after video setup to kick-start the phone's stream.
+pub fn video_focus_frame_unrequested(channel_id: ChannelId, focused: bool) -> Frame {
+    let mut ind = Wifi::VideoFocusIndication::new();
+    ind.set_focus_mode(if focused {
+        Wifi::video_focus_mode::Enum::FOCUSED
+    } else {
+        Wifi::video_focus_mode::Enum::UNFOCUSED
+    });
+    ind.set_unrequested(true);
+
+    let mut data = Vec::new();
+    let t = (Wifi::avchannel_message::Enum::VIDEO_FOCUS_INDICATION as u16).to_be_bytes();
+    data.extend_from_slice(&t);
+    data.extend_from_slice(&ind.write_to_bytes().unwrap());
+    Frame::new_encrypted(channel_id, data)
+}
+
+/// Build a video setup response frame that rejects the stream.
+pub fn video_setup_fail_frame(channel_id: ChannelId) -> Frame {
+    let mut resp = Wifi::AVChannelSetupResponse::new();
+    resp.set_media_status(Wifi::avchannel_setup_status::Enum::FAIL);
+    resp.set_max_unacked(0);
+
+    let mut data = Vec::new();
+    let t = (Wifi::avchannel_message::Enum::SETUP_RESPONSE as u16).to_be_bytes();
+    data.extend_from_slice(&t);
+    data.extend_from_slice(&resp.write_to_bytes().unwrap());
+    Frame::new_encrypted(channel_id, data)
+}
+
 /// Build an audio setup response frame.
 pub fn audio_setup_response_frame(channel_id: ChannelId) -> Frame {
    let mut resp = Wifi::AVChannelSetupResponse::new();
@@ -225,7 +258,7 @@ pub fn sensor_start_response_frame(channel_id: ChannelId) -> Frame {
 pub fn sensor_driving_status_frame(channel_id: ChannelId) -> Frame {
    let mut evt = Wifi::SensorEventIndication::new();
    let mut ds = Wifi::DrivingStatus::new();
-    ds.set_status(0); // DrivingStatusEnum::UNRESTRICTED = 0
+    ds.set_status(0); // DrivingStatusEnum::UNRESTRICTED
    evt.driving_status.push(ds);

    let mut data = Vec::new();
@@ -273,8 +306,8 @@ pub fn build_input_channel_descriptor(channel_id: ChannelId) -> Wifi::ChannelDes

    let mut input = Wifi::InputChannel::new();
    let mut ts_config = Wifi::TouchConfig::new();
-    ts_config.set_width(480);
-    ts_config.set_height(320);
+    ts_config.set_width(800);
+    ts_config.set_height(480);
    input.touch_screen_config = ::protobuf::MessageField::some(ts_config);

    chan.input_channel = ::protobuf::MessageField::some(input);
@@ -143,73 +143,29 @@ impl Default for DecoderConfig {
 /// H.264 decoder backed by the espressif/esp_h264 C component (v1.3.0).
 ///
 /// Lifecycle: `new()` creates and opens the decoder once.
-/// Each `decode()` call feeds one NAL unit and returns an RGB565 frame
-/// when a complete image frame is ready (IDR/P).  SPS/PPS/SEI NALs return
-/// `Ok(None)` — that is normal and expected at stream start.
+/// Each `decode_into()` call feeds one NAL unit and writes the decoded
+/// RGB565 frame directly into a caller-provided buffer (e.g. a VideoFrame).
+/// SPS/PPS/SEI NALs return `Ok(false)` — that is normal at stream start.
 ///
 /// The decoder's output I420 buffer is managed internally by the C library.
-/// Only the RGB565 output framebuffer is allocated here (in PSRAM).
+/// No intermediate RGB565 buffer is allocated — the caller owns the output.
 pub struct H264Decoder {
    config: DecoderConfig,
    /// esp_h264 decoder handle (opaque C pointer)
    handle: ffi::EspH264DecHandle,
-    /// Output RGB565 framebuffer (480×320 × 2 bytes) — in PSRAM
-    rgb565_buf: PsramBuf,
    /// Total frames that produced image output
    frames_decoded: u64,
    /// Bytes of H.264 data received (for the periodic log in session.rs)
    total_bytes_fed: usize,
 }

-/// A buffer allocated in PSRAM via heap_caps_malloc.
-struct PsramBuf {
-    ptr: *mut u8,
-    capacity: usize,
-}
-
-impl PsramBuf {
-    fn new(size: usize) -> Result<Self> {
-        let ptr = unsafe {
-            esp_idf_sys::heap_caps_malloc(size, esp_idf_sys::MALLOC_CAP_SPIRAM)
-        };
-        if ptr.is_null() {
-            let free = unsafe { esp_idf_sys::esp_get_free_heap_size() };
-            bail!(
-                "PSRAM alloc failed: {} KB requested, {} KB free heap",
-                size / 1024,
-                free / 1024,
-            );
-        }
-        unsafe { std::ptr::write_bytes(ptr as *mut u8, 0, size); }
-        Ok(Self { ptr: ptr as *mut u8, capacity: size })
-    }
-
-    fn as_u16_slice(&self, pixel_count: usize) -> &[u16] {
-        unsafe { std::slice::from_raw_parts(self.ptr as *const u16, pixel_count) }
-    }
-
-    fn as_u16_mut_slice(&mut self, pixel_count: usize) -> &mut [u16] {
-        unsafe { std::slice::from_raw_parts_mut(self.ptr as *mut u16, pixel_count) }
-    }
-}
-
-impl Drop for PsramBuf {
-    fn drop(&mut self) {
-        unsafe { esp_idf_sys::heap_caps_free(self.ptr as *mut std::ffi::c_void); }
-    }
-}
-
 impl H264Decoder {
    /// Create and open a new software H.264 decoder.
    ///
-    /// Calls `esp_h264_dec_sw_new` + `esp_h264_dec_open`.  Allocates only
-    /// the RGB565 output framebuffer in PSRAM; all I420 buffers are managed
-    /// internally by the C component.
+    /// Calls `esp_h264_dec_sw_new` + `esp_h264_dec_open`.  No RGB565
+    /// buffer is allocated — the caller provides the output buffer
+    /// via `decode_into()`.
    pub fn new(config: DecoderConfig) -> Result<Self> {
-        let rgb565_size = (config.target_width * config.target_height * 2) as usize;
-        let rgb565_buf = PsramBuf::new(rgb565_size)
-            .context("allocating RGB565 output buffer in PSRAM")?;
-
        let cfg = ffi::EspH264DecCfg {
            pic_type: ffi::ESP_H264_RAW_FMT_I420,
        };
@@ -230,35 +186,115 @@ impl H264Decoder {
            esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_SPIRAM)
        };
        log::info!(
-            "H.264 decoder ready: {}×{} → {}×{}, RGB565 {}KB in PSRAM, {} KB PSRAM free",
+            "H.264 decoder ready: {}×{} → {}×{}, {} KB PSRAM free",
            config.source_width, config.source_height,
            config.target_width, config.target_height,
-            rgb565_size / 1024,
            free_psram / 1024,
        );

        Ok(Self {
            config,
            handle,
-            rgb565_buf,
            frames_decoded: 0,
            total_bytes_fed: 0,
        })
    }

-    /// Feed one H.264 NAL unit from Android Auto and decode it.
+    /// Feed one H.264 NAL unit and decode directly into the caller's buffer.
    ///
-    /// Returns `Ok(Some(rgb565))` when an image frame was decoded (IDR or P).
-    /// Returns `Ok(None)` for SPS / PPS / SEI NALs — this is expected and
+    /// Returns `Ok(true)` when an image frame was decoded (IDR or P).
+    /// Returns `Ok(false)` for SPS / PPS / SEI NALs — this is expected and
    /// normal at the start of a stream.
    ///
-    /// The returned slice is valid until the next call to `decode()`.
-    pub fn decode(&mut self, h264_data: &[u8]) -> Result<Option<&[u16]>> {
+    /// The `rgb565_out` buffer must be at least `target_width * target_height`
+    /// pixels. This avoids allocating an intermediate 300KB PSRAM buffer.
+    pub fn decode_into(&mut self, h264_data: &[u8], rgb565_out: &mut [u16]) -> Result<bool> {
+        self.total_bytes_fed += h264_data.len();
+
+        let mut in_frame = ffi::EspH264DecInFrame {
+            raw_data: ffi::EspH264Pkt {
+                buffer: h264_data.as_ptr() as *mut u8,
+                len: h264_data.len() as u32,
+            },
+            consume: 0,
+            dts: 0,
+            pts: 0,
+        };
+        let mut out_frame = ffi::EspH264DecOutFrame {
+            frame_type: -1,
+            outbuf: core::ptr::null_mut(),
+            out_size: 0,
+            dts: 0,
+            pts: 0,
+        };
+
+        let ret = unsafe {
+            ffi::esp_h264_dec_process(self.handle, &mut in_frame, &mut out_frame)
+        };
+        if ret != ffi::ESP_H264_ERR_OK {
+            log::warn!("esp_h264_dec_process error: {}", ret);
+            return Ok(false);
+        }
+
+        // SPS / PPS / SEI — no image data, completely normal
+        if out_frame.out_size == 0 || out_frame.outbuf.is_null() {
+            return Ok(false);
+        }
+
+        // outbuf points to component-owned I420 data, valid until next call
+        let i420 = unsafe {
+            core::slice::from_raw_parts(out_frame.outbuf, out_frame.out_size as usize)
+        };
+
+        // If caller passed an empty buffer (discard mode), skip conversion
+        if rgb565_out.is_empty() {
+            self.frames_decoded += 1;
+            return Ok(true);
+        }
+
+        i420_to_rgb565_downscale(
+            i420,
+            self.config.source_width,
+            self.config.source_height,
+            rgb565_out,
+            self.config.target_width,
+            self.config.target_height,
+        );
+
+        self.frames_decoded += 1;
+        if self.frames_decoded % 30 == 1 {
+            let free_psram = unsafe {
+                esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_SPIRAM)
+            };
+            let free_internal = unsafe {
+                esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_INTERNAL)
+            };
+            log::info!(
+                "🎬 Frame #{} decoded ({}×{} I420 → {}×{} RGB565, {} KB fed, PSRAM {}KB free, DRAM {}KB free)",
+                self.frames_decoded,
+                self.config.source_width, self.config.source_height,
+                self.config.target_width, self.config.target_height,
+                self.total_bytes_fed / 1024,
+                free_psram / 1024,
+                free_internal / 1024,
+            );
+        }
+
+        Ok(true)
+    }
+
+    /// Decode one H.264 NAL unit and return a pointer to the raw I420 output.
+    ///
+    /// Returns `Ok(Some((ptr, len)))` when an image frame was decoded.
+    /// Returns `Ok(None)` for SPS / PPS / SEI NALs.
+    ///
+    /// # Safety contract
+    /// The returned pointer is component-owned and valid ONLY until the next
+    /// `decode_raw` / `decode_into` call. The caller must consume the data
+    /// before calling any other decode method.
+    pub fn decode_raw(&mut self, h264_data: &[u8]) -> Result<Option<(*const u8, usize)>> {
        self.total_bytes_fed += h264_data.len();

-        // Build input frame pointing directly at the caller's slice.
-        // The C API reads (does not write) this buffer, so casting away
-        // const is safe for the duration of the synchronous FFI call.
        let mut in_frame = ffi::EspH264DecInFrame {
            raw_data: ffi::EspH264Pkt {
                buffer: h264_data.as_ptr() as *mut u8,
@@ -284,40 +320,12 @@ impl H264Decoder {
            return Ok(None);
        }

-        // SPS / PPS / SEI — no image data, completely normal
        if out_frame.out_size == 0 || out_frame.outbuf.is_null() {
            return Ok(None);
        }

-        // outbuf points to component-owned I420 data, valid until next call
-        let i420 = unsafe {
-            core::slice::from_raw_parts(out_frame.outbuf, out_frame.out_size as usize)
-        };
-
-        let pixels = (self.config.target_width * self.config.target_height) as usize;
-        let rgb565 = self.rgb565_buf.as_u16_mut_slice(pixels);
-
-        i420_to_rgb565_downscale(
-            i420,
-            self.config.source_width,
-            self.config.source_height,
-            rgb565,
-            self.config.target_width,
-            self.config.target_height,
-        );
-
        self.frames_decoded += 1;
-        if self.frames_decoded % 30 == 1 {
-            log::info!(
-                "🎬 Frame #{} decoded ({}×{} I420 → {}×{} RGB565, {} KB fed total)",
-                self.frames_decoded,
-                self.config.source_width, self.config.source_height,
-                self.config.target_width, self.config.target_height,
-                self.total_bytes_fed / 1024,
-            );
-        }
-
-        Ok(Some(self.rgb565_buf.as_u16_slice(pixels)))
+        Ok(Some((out_frame.outbuf as *const u8, out_frame.out_size as usize)))
    }

    /// Total image frames decoded successfully.
@@ -326,6 +334,10 @@ impl H264Decoder {
    /// Total bytes of H.264 data fed so far (used for logging in session.rs).
    pub fn nal_len(&self) -> usize { self.total_bytes_fed }

+    /// Source video dimensions (what Android Auto sends).
+    pub fn source_width(&self) -> u32 { self.config.source_width }
+    pub fn source_height(&self) -> u32 { self.config.source_height }
+
    /// Output framebuffer dimensions (width, height).
    pub fn output_dimensions(&self) -> (u32, u32) {
        (self.config.target_width, self.config.target_height)
@@ -348,17 +360,63 @@ impl Drop for H264Decoder {
 // I420 → RGB565 conversion with nearest-neighbor downscaling
 // ---------------------------------------------------------------------------

+// YUV→RGB565 lookup tables.  Precomputed once, reused every frame.
+// These replace per-pixel multiply + clamp with a single table lookup.
+//
+// For each of the 256 possible Y/U/V byte values, we precompute the
+// partial contribution to R, G, B scaled to 16-bit fixed-point.
+// At conversion time:  r = Y_R[y] + V_R[v]  (then clamp and shift)
+//
+// Table sizes: 4 tables × 256 entries × 4 bytes = 4 KB total.
+
+use std::sync::OnceLock;
+
+struct YuvLut {
+    y_r: [i16; 256],   // Y contribution to R (and B) — just Y itself
+    v_r: [i16; 256],   // V contribution to R:  1.370705 * (V-128)
+    uv_g: [i16; 512],  // combined U+V contribution to G (indexed by u*256+v would be huge)
+    u_b: [i16; 256],   // U contribution to B:  1.732446 * (U-128)
+    // For G we precompute per-UV pair: -0.698001*(V-128) - 0.337633*(U-128)
+    // But that's 64K entries. Instead store components separately:
+    v_g: [i16; 256],   // V contribution to G: -0.698001 * (V-128)
+    u_g: [i16; 256],   // U contribution to G: -0.337633 * (U-128)
+}
+
+static YUV_LUT: OnceLock<YuvLut> = OnceLock::new();
+
+fn get_yuv_lut() -> &'static YuvLut {
+    YUV_LUT.get_or_init(|| {
+        let mut lut = YuvLut {
+            y_r: [0; 256],
+            v_r: [0; 256],
+            uv_g: [0; 512],  // unused, kept for alignment
+            u_b: [0; 256],
+            v_g: [0; 256],
+            u_g: [0; 256],
+        };
+        for i in 0..256 {
+            lut.y_r[i] = i as i16;
+            let v = i as i16 - 128;
+            let u = i as i16 - 128;
+            lut.v_r[i] = ((351 * v as i32) >> 8) as i16;
+            lut.v_g[i] = ((179 * v as i32) >> 8) as i16;  // positive; subtracted later
+            lut.u_g[i] = ((86 * u as i32) >> 8) as i16;   // positive; subtracted later
+            lut.u_b[i] = ((443 * u as i32) >> 8) as i16;
+        }
+        lut
+    })
+}
+
+/// Clamp to [0,255] without branching, then return as u16.
+#[inline(always)]
+fn clamp8(v: i16) -> u16 {
+    v.max(0).min(255) as u16
+}
+
 /// Convert I420 (YUV 4:2:0 planar) to RGB565 with nearest-neighbor downscaling.
 ///
-/// I420 layout:
-///   Y plane: src_w × src_h bytes (one byte per pixel)
-///   U plane: (src_w/2) × (src_h/2) bytes
-///   V plane: (src_w/2) × (src_h/2) bytes
-///
-/// RGB565 layout: 16-bit per pixel, [RRRRRGGG_GGGBBBBB]
-///
-/// Nearest-neighbor scaling maps each output pixel to the closest input pixel,
-/// which is fast and sufficient for a 480×320 embedded display.
+/// Optimized for ESP32-S3: uses precomputed LUTs, processes 2 pixels per UV pair
+/// when possible, and minimizes cache pressure by scanning linearly.
 pub fn i420_to_rgb565_downscale(
    i420: &[u8],
    src_w: u32,
@@ -367,6 +425,7 @@ pub fn i420_to_rgb565_downscale(
    dst_w: u32,
    dst_h: u32,
 ) {
+    let lut = get_yuv_lut();
    let y_plane = &i420[..(src_w * src_h) as usize];
    let u_offset = (src_w * src_h) as usize;
    let v_offset = u_offset + (src_w * src_h / 4) as usize;
@@ -375,7 +434,6 @@ pub fn i420_to_rgb565_downscale(
    let uv_stride = (src_w / 2) as usize;

    // Precompute horizontal and vertical mapping tables
-    // This avoids repeated division in the inner loop
    let x_map: Vec<u32> = (0..dst_w)
        .map(|dx| dx * src_w / dst_w)
        .collect();
@@ -383,28 +441,26 @@ pub fn i420_to_rgb565_downscale(
        .map(|dy| dy * src_h / dst_h)
        .collect();

-    for dy in 0..dst_h {
-        let src_y = y_map[dy as usize];
-        let dst_row_offset = (dy * dst_w) as usize;
-        let y_row_offset = (src_y * src_w) as usize;
-        let uv_row = (src_y / 2) as usize;
+    for dy in 0..dst_h as usize {
+        let src_y = y_map[dy] as usize;
+        let dst_off = dy * dst_w as usize;
+        let y_off = src_y * src_w as usize;
+        let uv_row = src_y / 2;
+        let uv_off = uv_row * uv_stride;

-        for dx in 0..dst_w {
-            let src_x = x_map[dx as usize];
+        for dx in 0..dst_w as usize {
+            let src_x = x_map[dx] as usize;
+            let uv_x = src_x / 2;

-            // Fetch YUV values
-            let y_val = y_plane[y_row_offset + src_x as usize] as i32;
-            let u_val = u_plane[uv_row * uv_stride + (src_x / 2) as usize] as i32 - 128;
-            let v_val = v_plane[uv_row * uv_stride + (src_x / 2) as usize] as i32 - 128;
+            let y_val = lut.y_r[y_plane[y_off + src_x] as usize];
+            let u_idx = u_plane[uv_off + uv_x] as usize;
+            let v_idx = v_plane[uv_off + uv_x] as usize;

-            // YUV → RGB (BT.601 standard, integer math)
-            let r = (y_val + ((351 * v_val) >> 8)).clamp(0, 255) as u16;
-            let g = (y_val - ((179 * v_val + 86 * u_val) >> 8)).clamp(0, 255) as u16;
-            let b = (y_val + ((443 * u_val) >> 8)).clamp(0, 255) as u16;
+            let r = clamp8(y_val + lut.v_r[v_idx]);
+            let g = clamp8(y_val - lut.v_g[v_idx] - lut.u_g[u_idx]);
+            let b = clamp8(y_val + lut.u_b[u_idx]);

-            // Pack RGB565: RRRRR_GGGGGG_BBBBB
-            rgb565[dst_row_offset + dx as usize] =
-                ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+            rgb565[dst_off + dx] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
        }
    }
 }
@@ -420,6 +476,71 @@ pub fn i420_to_rgb565(
    i420_to_rgb565_downscale(i420, width, height, rgb565, width, height);
 }

+/// Convert a horizontal strip of I420 to RGB565 with nearest-neighbor downscaling.
+///
+/// Only processes output rows `[dst_y_start .. dst_y_start + strip_h)` and writes
+/// them into `out[0 .. dst_w * strip_h]`.  Designed for direct-to-DMA conversion:
+/// the caller provides an SRAM DMA staging buffer as `out`, so no PSRAM intermediate
+/// is needed.
+///
+/// Uses unsafe pointer arithmetic + `get_unchecked` to eliminate bounds checks
+/// in the hot inner loop — measurably faster on ESP32-S3.
+pub fn i420_to_rgb565_strip(
+    i420: &[u8],
+    src_w: u32,
+    src_h: u32,
+    dst_w: u32,
+    dst_h: u32,
+    dst_y_start: u32,
+    strip_h: u32,
+    out: &mut [u16],
+) {
+    let lut = get_yuv_lut();
+    let src_pixels = (src_w * src_h) as usize;
+    let y_plane = i420.as_ptr();
+    let u_plane = unsafe { y_plane.add(src_pixels) };
+    let v_plane = unsafe { u_plane.add(src_pixels / 4) };
+    let uv_stride = (src_w / 2) as usize;
+    let dst_w_us = dst_w as usize;
+
+    // Precompute horizontal source-x mapping on stack (max 480 for our display)
+    let mut x_map = [0u16; 480];
+    for dx in 0..dst_w_us.min(480) {
+        x_map[dx] = (dx as u32 * src_w / dst_w) as u16;
+    }
+
+    for dy_local in 0..strip_h as usize {
+        let dy = dst_y_start as usize + dy_local;
+        if dy >= dst_h as usize {
+            break;
+        }
+
+        let src_y = (dy as u32 * src_h / dst_h) as usize;
+        let y_row = unsafe { y_plane.add(src_y * src_w as usize) };
+        let uv_row_off = (src_y / 2) * uv_stride;
+        let u_row = unsafe { u_plane.add(uv_row_off) };
+        let v_row = unsafe { v_plane.add(uv_row_off) };
+        let out_off = dy_local * dst_w_us;
+
+        for dx in 0..dst_w_us {
+            unsafe {
+                let src_x = *x_map.get_unchecked(dx) as usize;
+                let uv_x = src_x >> 1;
+
+                let y_val = *lut.y_r.get_unchecked(*y_row.add(src_x) as usize);
+                let u_idx = *u_row.add(uv_x) as usize;
+                let v_idx = *v_row.add(uv_x) as usize;
+
+                let r = clamp8(y_val + *lut.v_r.get_unchecked(v_idx));
+                let g = clamp8(y_val - *lut.v_g.get_unchecked(v_idx) - *lut.u_g.get_unchecked(u_idx));
+                let b = clamp8(y_val + *lut.u_b.get_unchecked(u_idx));
+
+                *out.get_unchecked_mut(out_off + dx) = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+            }
+        }
+    }
+}
+
 // ---------------------------------------------------------------------------
 // Bilinear downscaling (higher quality, more CPU)
 // ---------------------------------------------------------------------------
@@ -16,9 +16,9 @@ pub const DISPLAY_WIDTH: u16 = 480;
 pub const DISPLAY_HEIGHT: u16 = 320;
 pub const DISPLAY_PIXELS: usize = DISPLAY_WIDTH as usize * DISPLAY_HEIGHT as usize;

-/// Height of each DMA strip (lines). Tradeoff: larger = fewer DMA calls, more SRAM.
-/// 20 lines × 480 × 2 = 19,200 bytes of internal SRAM.
-const STRIP_LINES: usize = 20;
+/// Height of each DMA strip (lines). Larger = fewer DMA calls but more SRAM.
+/// 40 lines × 480 × 2 = 38,400 bytes. With double-buffer (2 strips), ~76.8 KB internal SRAM.
+pub const STRIP_LINES: usize = 40;
 const STRIP_BYTES: usize = DISPLAY_WIDTH as usize * STRIP_LINES * 2;

 // WT32-SC01 Plus GPIO pin assignments
@@ -39,10 +39,9 @@ const PIN_BACKLIGHT: i32 = 45;
 pub struct Display {
    panel_handle: sys::esp_lcd_panel_handle_t,
    io_handle: sys::esp_lcd_panel_io_handle_t,
-    /// DMA staging buffer in internal SRAM (NOT PSRAM).
-    /// PSRAM data can't be DMA'd directly on ESP32-S3 I80 bus reliably.
-    /// We copy strip-by-strip: PSRAM → stage → DMA → display.
-    dma_stage: *mut u8,
+    /// Double DMA staging buffers in internal SRAM (NOT PSRAM).
+    /// While DMA sends buf[0], CPU fills buf[1], then swap.
+    dma_stage: [*mut u8; 2],
 }

 // SAFETY: The display handles are used from a single thread (video_display_loop)
@@ -79,18 +78,22 @@ impl Display {
            // Create and initialize ST7796 panel
            let panel_handle = Self::init_panel(io_handle)?;

-            // Allocate DMA staging buffer in internal SRAM
-            let dma_stage = sys::heap_caps_malloc(
+            // Allocate double DMA staging buffers in internal SRAM
+            let dma_stage_0 = sys::heap_caps_malloc(
                STRIP_BYTES,
                sys::MALLOC_CAP_DMA | sys::MALLOC_CAP_INTERNAL,
            ) as *mut u8;
-            if dma_stage.is_null() {
-                bail!("Failed to allocate {}B DMA stage buffer in internal SRAM", STRIP_BYTES);
+            let dma_stage_1 = sys::heap_caps_malloc(
+                STRIP_BYTES,
+                sys::MALLOC_CAP_DMA | sys::MALLOC_CAP_INTERNAL,
+            ) as *mut u8;
+            if dma_stage_0.is_null() || dma_stage_1.is_null() {
+                bail!("Failed to allocate {}B ×2 DMA stage buffers in internal SRAM", STRIP_BYTES);
            }

-            log::info!("ST7796 display initialized ({}×{}, DMA stage {}B)", DISPLAY_WIDTH, DISPLAY_HEIGHT, STRIP_BYTES);
+            log::info!("ST7796 display initialized ({}×{}, DMA stage {}B×2)", DISPLAY_WIDTH, DISPLAY_HEIGHT, STRIP_BYTES);

-            Ok(Self { panel_handle, io_handle, dma_stage })
+            Ok(Self { panel_handle, io_handle, dma_stage: [dma_stage_0, dma_stage_1] })
        }
    }

@@ -102,26 +105,31 @@ impl Display {
    pub fn draw_rgb565(&self, data: &[u8]) {
        debug_assert_eq!(data.len(), DISPLAY_PIXELS * 2);

+        let mut buf_idx = 0usize;
        for y in (0..DISPLAY_HEIGHT as usize).step_by(STRIP_LINES) {
            let h = STRIP_LINES.min(DISPLAY_HEIGHT as usize - y);
            let offset = y * DISPLAY_WIDTH as usize * 2;
            let len = DISPLAY_WIDTH as usize * h * 2;

            // Copy from PSRAM → internal SRAM staging buffer
+            // With trans_queue_depth=2, the previous strip's DMA may still
+            // be in flight on the OTHER buffer — that's fine, we write to
+            // the alternating buffer.
            unsafe {
                ptr::copy_nonoverlapping(
                    data[offset..].as_ptr(),
-                    self.dma_stage,
+                    self.dma_stage[buf_idx],
                    len,
                );
                Self::flush_pixels(
                    self.io_handle,
                    0, y as u16,
                    DISPLAY_WIDTH, (y + h) as u16,
-                    self.dma_stage,
+                    self.dma_stage[buf_idx],
                    len,
                );
            }
+            buf_idx ^= 1; // alternate between buf 0 and 1
        }
    }

@@ -139,13 +147,56 @@ impl Display {
    }

    /// Fill entire screen with a solid RGB565 color.
+    /// Get a mutable u16 slice into one of the DMA staging buffers.
+    ///
+    /// `idx` must be 0 or 1. The returned slice has `DISPLAY_WIDTH * STRIP_LINES`
+    /// entries. Callers can write RGB565 pixels directly here, then call
+    /// `flush_strip()` to DMA the data to the LCD — zero-copy from conversion.
+    ///
+    /// # Safety
+    /// The caller must ensure the previous DMA transfer using this buffer index
+    /// has completed. With `trans_queue_depth=2` and alternating indices, the
+    /// blocking `flush_strip` call on the OTHER buffer guarantees this.
+    pub fn dma_stage_mut(&self, idx: usize) -> &mut [u16] {
+        debug_assert!(idx < 2);
+        unsafe {
+            std::slice::from_raw_parts_mut(
+                self.dma_stage[idx] as *mut u16,
+                DISPLAY_WIDTH as usize * STRIP_LINES,
+            )
+        }
+    }
+
+    /// Push one strip from a DMA staging buffer to the LCD.
+    ///
+    /// `y`: starting row on the display.
+    /// `h`: number of rows in this strip.
+    /// `buf_idx`: which DMA staging buffer (0 or 1) holds the data.
+    ///
+    /// With `trans_queue_depth=2` this may block until a previous transfer
+    /// completes, providing the back-pressure needed for safe double-buffering.
+    pub fn flush_strip(&self, y: u16, h: u16, buf_idx: usize) {
+        let len = DISPLAY_WIDTH as usize * h as usize * 2;
+        unsafe {
+            Self::flush_pixels(
+                self.io_handle,
+                0, y,
+                DISPLAY_WIDTH, y + h,
+                self.dma_stage[buf_idx],
+                len,
+            );
+        }
+    }
+
    pub fn fill_color(&self, color: u16) {
-        // Fill the DMA staging buffer with the color
+        // Fill both DMA staging buffers with the color
        let pixels = DISPLAY_WIDTH as usize * STRIP_LINES;
        unsafe {
-            let stage_u16 = self.dma_stage as *mut u16;
-            for i in 0..pixels {
-                *stage_u16.add(i) = color;
+            for buf in &self.dma_stage {
+                let stage_u16 = *buf as *mut u16;
+                for i in 0..pixels {
+                    *stage_u16.add(i) = color;
+                }
            }
        }

@@ -157,13 +208,18 @@ impl Display {
                    self.io_handle,
                    0, y,
                    DISPLAY_WIDTH, y + h,
-                    self.dma_stage,
+                    self.dma_stage[0],
                    len,
                );
            }
        }
    }

+    // --- Text rendering (nav-only mode) ---
+    // Removed: per-character DMA methods caused race conditions (garbled text).
+    // Use the free functions render_text_to_strip() and render_image_to_strip()
+    // below, which composite into a full strip buffer before a single DMA push.
+
    // --- Private initialization helpers ---

    unsafe fn init_i80_bus() -> Result<sys::esp_lcd_i80_bus_handle_t> {
@@ -198,7 +254,7 @@ impl Display {
        let mut io_config: sys::esp_lcd_panel_io_i80_config_t = std::mem::zeroed();
        io_config.cs_gpio_num = -1;
        io_config.pclk_hz = 40_000_000; // 40MHz
-        io_config.trans_queue_depth = 1; // Blocking: wait for DMA to finish before reusing stage buffer
+        io_config.trans_queue_depth = 2; // Double-buffer: 1 in-flight DMA + 1 being filled by CPU
        io_config.lcd_cmd_bits = 8;
        io_config.lcd_param_bits = 8;
        io_config.on_color_trans_done = None;
@@ -343,8 +399,242 @@ impl Display {

 impl Drop for Display {
    fn drop(&mut self) {
-        if !self.dma_stage.is_null() {
-            unsafe { sys::heap_caps_free(self.dma_stage as *mut c_void); }
+        for buf in &self.dma_stage {
+            if !buf.is_null() {
+                unsafe { sys::heap_caps_free(*buf as *mut c_void); }
+            }
        }
    }
 }
+
+// ---------------------------------------------------------------------------
+// Strip-based rendering functions (nav-only mode)
+//
+// These write directly into a provided strip buffer (e.g. a DMA staging buf).
+// The caller fills the strip, then calls lcd.flush_strip() ONCE — this avoids
+// the DMA race condition that caused garbled text with per-character rendering.
+// ---------------------------------------------------------------------------
+
+/// Render text into a horizontal strip buffer.
+///
+/// - `buf`: strip pixel buffer, width × strip_h pixels (RGB565).
+/// - `buf_w`: width of the buffer (DISPLAY_WIDTH).
+/// - `strip_y`: global Y of the first row in this strip.
+/// - `strip_h`: height of this strip in pixels.
+/// - `text`: ASCII string to render.
+/// - `text_x`, `text_y`: global X, Y position of the text top-left.
+/// - `color`: RGB565 foreground color (background is left untouched).
+/// - `scale`: pixel multiplier (1=tiny, 3=normal, 5=large).
+#[cfg(feature = "nav-only")]
+pub fn render_text_to_strip(
+    buf: &mut [u16],
+    buf_w: u16,
+    strip_y: u16,
+    strip_h: u16,
+    text: &str,
+    text_x: u16,
+    text_y: u16,
+    color: u16,
+    scale: u16,
+) {
+    let char_w = 5 * scale;
+    let char_h = 7 * scale;
+    let gap = scale;
+
+    // Quick reject: does the text row overlap this strip at all?
+    if text_y + char_h <= strip_y || text_y >= strip_y + strip_h {
+        return;
+    }
+
+    let mut cx = text_x;
+    for &ch in text.as_bytes() {
+        if cx >= buf_w { break; }
+        if ch < 32 || ch > 127 { cx += char_w + gap; continue; }
+
+        let glyph = &FONT_5X7[(ch - 32) as usize];
+
+        // Which glyph rows are visible in this strip?
+        let vis_y_start = text_y.max(strip_y);
+        let vis_y_end = (text_y + char_h).min(strip_y + strip_h);
+
+        for gy in vis_y_start..vis_y_end {
+            let glyph_row = ((gy - text_y) / scale) as usize;
+            if glyph_row >= 7 { break; }
+            let bits = glyph[glyph_row];
+            let buf_row = (gy - strip_y) as usize;
+
+            for col in 0..5u16 {
+                if (bits >> (4 - col)) & 1 != 0 {
+                    for sx in 0..scale {
+                        let px = (cx + col * scale + sx) as usize;
+                        if px < buf_w as usize {
+                            buf[buf_row * buf_w as usize + px] = color;
+                        }
+                    }
+                }
+            }
+        }
+
+        cx += char_w + gap;
+    }
+}
+
+/// Render text horizontally centered at the given Y.
+#[cfg(feature = "nav-only")]
+pub fn render_text_centered_to_strip(
+    buf: &mut [u16],
+    buf_w: u16,
+    strip_y: u16,
+    strip_h: u16,
+    text: &str,
+    text_y: u16,
+    color: u16,
+    scale: u16,
+) {
+    let char_advance = 6 * scale; // 5px + 1px gap, scaled
+    let text_w = text.len() as u16 * char_advance;
+    let x = if text_w >= buf_w { 0 } else { (buf_w - text_w) / 2 };
+    render_text_to_strip(buf, buf_w, strip_y, strip_h, text, x, text_y, color, scale);
+}
+
+/// Blit a pre-decoded RGB565 image into a strip buffer.
+///
+/// `img`: RGB565 pixels, row-major, `img_w × img_h`.
+/// `img_x`, `img_y`: global position on screen where the image top-left goes.
+/// Only the rows overlapping the current strip are copied.
+#[cfg(feature = "nav-only")]
+pub fn render_image_to_strip(
+    buf: &mut [u16],
+    buf_w: u16,
+    strip_y: u16,
+    strip_h: u16,
+    img: &[u16],
+    img_w: u16,
+    img_h: u16,
+    img_x: u16,
+    img_y: u16,
+) {
+    // Quick reject
+    if img_y + img_h <= strip_y || img_y >= strip_y + strip_h { return; }
+    if img_x >= buf_w { return; }
+
+    let vis_y_start = img_y.max(strip_y);
+    let vis_y_end = (img_y + img_h).min(strip_y + strip_h);
+    let copy_w = img_w.min(buf_w - img_x) as usize;
+
+    for gy in vis_y_start..vis_y_end {
+        let src_row = (gy - img_y) as usize;
+        let dst_row = (gy - strip_y) as usize;
+        let src_off = src_row * img_w as usize;
+        let dst_off = dst_row * buf_w as usize + img_x as usize;
+        buf[dst_off..dst_off + copy_w].copy_from_slice(&img[src_off..src_off + copy_w]);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 5×7 bitmap font — ASCII 32 (' ') through 127 ('~'+DEL)
+//
+// Each glyph is 7 rows of 5 bits packed in a u8 (MSB = leftmost pixel).
+// Example: 'A' = [0x0E, 0x11, 0x11, 0x1F, 0x11, 0x11, 0x11]
+//                    .###.  #...#  #...#  #####  #...#  #...#  #...#
+// ---------------------------------------------------------------------------
+#[cfg(feature = "nav-only")]
+#[rustfmt::skip]
+static FONT_5X7: [[u8; 7]; 96] = [
+    [0x00,0x00,0x00,0x00,0x00,0x00,0x00], // 32 ' '
+    [0x04,0x04,0x04,0x04,0x04,0x00,0x04], // 33 '!'
+    [0x0A,0x0A,0x00,0x00,0x00,0x00,0x00], // 34 '"'
+    [0x0A,0x1F,0x0A,0x0A,0x1F,0x0A,0x00], // 35 '#'
+    [0x04,0x0F,0x14,0x0E,0x05,0x1E,0x04], // 36 '$'
+    [0x18,0x19,0x02,0x04,0x08,0x13,0x03], // 37 '%'
+    [0x08,0x14,0x14,0x08,0x15,0x12,0x0D], // 38 '&'
+    [0x04,0x04,0x00,0x00,0x00,0x00,0x00], // 39 '''
+    [0x02,0x04,0x08,0x08,0x08,0x04,0x02], // 40 '('
+    [0x08,0x04,0x02,0x02,0x02,0x04,0x08], // 41 ')'
+    [0x00,0x04,0x15,0x0E,0x15,0x04,0x00], // 42 '*'
+    [0x00,0x04,0x04,0x1F,0x04,0x04,0x00], // 43 '+'
+    [0x00,0x00,0x00,0x00,0x00,0x04,0x08], // 44 ','
+    [0x00,0x00,0x00,0x1F,0x00,0x00,0x00], // 45 '-'
+    [0x00,0x00,0x00,0x00,0x00,0x00,0x04], // 46 '.'
+    [0x01,0x02,0x02,0x04,0x08,0x08,0x10], // 47 '/'
+    [0x0E,0x11,0x13,0x15,0x19,0x11,0x0E], // 48 '0'
+    [0x04,0x0C,0x04,0x04,0x04,0x04,0x0E], // 49 '1'
+    [0x0E,0x11,0x01,0x06,0x08,0x10,0x1F], // 50 '2'
+    [0x0E,0x11,0x01,0x0E,0x01,0x11,0x0E], // 51 '3'
+    [0x02,0x06,0x0A,0x12,0x1F,0x02,0x02], // 52 '4'
+    [0x1F,0x10,0x1E,0x01,0x01,0x11,0x0E], // 53 '5'
+    [0x06,0x08,0x10,0x1E,0x11,0x11,0x0E], // 54 '6'
+    [0x1F,0x01,0x02,0x04,0x08,0x08,0x08], // 55 '7'
+    [0x0E,0x11,0x11,0x0E,0x11,0x11,0x0E], // 56 '8'
+    [0x0E,0x11,0x11,0x0F,0x01,0x02,0x0C], // 57 '9'
+    [0x00,0x00,0x04,0x00,0x00,0x04,0x00], // 58 ':'
+    [0x00,0x00,0x04,0x00,0x00,0x04,0x08], // 59 ';'
+    [0x02,0x04,0x08,0x10,0x08,0x04,0x02], // 60 '<'
+    [0x00,0x00,0x1F,0x00,0x1F,0x00,0x00], // 61 '='
+    [0x08,0x04,0x02,0x01,0x02,0x04,0x08], // 62 '>'
+    [0x0E,0x11,0x01,0x02,0x04,0x00,0x04], // 63 '?'
+    [0x0E,0x11,0x17,0x15,0x17,0x10,0x0E], // 64 '@'
+    [0x0E,0x11,0x11,0x1F,0x11,0x11,0x11], // 65 'A'
+    [0x1E,0x11,0x11,0x1E,0x11,0x11,0x1E], // 66 'B'
+    [0x0E,0x11,0x10,0x10,0x10,0x11,0x0E], // 67 'C'
+    [0x1E,0x11,0x11,0x11,0x11,0x11,0x1E], // 68 'D'
+    [0x1F,0x10,0x10,0x1E,0x10,0x10,0x1F], // 69 'E'
+    [0x1F,0x10,0x10,0x1E,0x10,0x10,0x10], // 70 'F'
+    [0x0E,0x11,0x10,0x17,0x11,0x11,0x0F], // 71 'G'
+    [0x11,0x11,0x11,0x1F,0x11,0x11,0x11], // 72 'H'
+    [0x0E,0x04,0x04,0x04,0x04,0x04,0x0E], // 73 'I'
+    [0x07,0x02,0x02,0x02,0x02,0x12,0x0C], // 74 'J'
+    [0x11,0x12,0x14,0x18,0x14,0x12,0x11], // 75 'K'
+    [0x10,0x10,0x10,0x10,0x10,0x10,0x1F], // 76 'L'
+    [0x11,0x1B,0x15,0x15,0x11,0x11,0x11], // 77 'M'
+    [0x11,0x19,0x15,0x13,0x11,0x11,0x11], // 78 'N'
+    [0x0E,0x11,0x11,0x11,0x11,0x11,0x0E], // 79 'O'
+    [0x1E,0x11,0x11,0x1E,0x10,0x10,0x10], // 80 'P'
+    [0x0E,0x11,0x11,0x11,0x15,0x12,0x0D], // 81 'Q'
+    [0x1E,0x11,0x11,0x1E,0x14,0x12,0x11], // 82 'R'
+    [0x0E,0x11,0x10,0x0E,0x01,0x11,0x0E], // 83 'S'
+    [0x1F,0x04,0x04,0x04,0x04,0x04,0x04], // 84 'T'
+    [0x11,0x11,0x11,0x11,0x11,0x11,0x0E], // 85 'U'
+    [0x11,0x11,0x11,0x11,0x0A,0x0A,0x04], // 86 'V'
+    [0x11,0x11,0x11,0x15,0x15,0x1B,0x11], // 87 'W'
+    [0x11,0x11,0x0A,0x04,0x0A,0x11,0x11], // 88 'X'
+    [0x11,0x11,0x0A,0x04,0x04,0x04,0x04], // 89 'Y'
+    [0x1F,0x01,0x02,0x04,0x08,0x10,0x1F], // 90 'Z'
+    [0x0E,0x08,0x08,0x08,0x08,0x08,0x0E], // 91 '['
+    [0x10,0x08,0x08,0x04,0x02,0x02,0x01], // 92 '\'
+    [0x0E,0x02,0x02,0x02,0x02,0x02,0x0E], // 93 ']'
+    [0x04,0x0A,0x11,0x00,0x00,0x00,0x00], // 94 '^'
+    [0x00,0x00,0x00,0x00,0x00,0x00,0x1F], // 95 '_'
+    [0x08,0x04,0x00,0x00,0x00,0x00,0x00], // 96 '`'
+    [0x00,0x00,0x0E,0x01,0x0F,0x11,0x0F], // 97 'a'
+    [0x10,0x10,0x1E,0x11,0x11,0x11,0x1E], // 98 'b'
+    [0x00,0x00,0x0E,0x11,0x10,0x11,0x0E], // 99 'c'
+    [0x01,0x01,0x0F,0x11,0x11,0x11,0x0F], // 100 'd'
+    [0x00,0x00,0x0E,0x11,0x1F,0x10,0x0E], // 101 'e'
+    [0x06,0x08,0x1E,0x08,0x08,0x08,0x08], // 102 'f'
+    [0x00,0x00,0x0F,0x11,0x0F,0x01,0x0E], // 103 'g'
+    [0x10,0x10,0x1E,0x11,0x11,0x11,0x11], // 104 'h'
+    [0x04,0x00,0x0C,0x04,0x04,0x04,0x0E], // 105 'i'
+    [0x02,0x00,0x06,0x02,0x02,0x12,0x0C], // 106 'j'
+    [0x10,0x10,0x12,0x14,0x18,0x14,0x12], // 107 'k'
+    [0x0C,0x04,0x04,0x04,0x04,0x04,0x0E], // 108 'l'
+    [0x00,0x00,0x1A,0x15,0x15,0x11,0x11], // 109 'm'
+    [0x00,0x00,0x1E,0x11,0x11,0x11,0x11], // 110 'n'
+    [0x00,0x00,0x0E,0x11,0x11,0x11,0x0E], // 111 'o'
+    [0x00,0x00,0x1E,0x11,0x1E,0x10,0x10], // 112 'p'
+    [0x00,0x00,0x0F,0x11,0x0F,0x01,0x01], // 113 'q'
+    [0x00,0x00,0x16,0x19,0x10,0x10,0x10], // 114 'r'
+    [0x00,0x00,0x0F,0x10,0x0E,0x01,0x1E], // 115 's'
+    [0x08,0x08,0x1E,0x08,0x08,0x09,0x06], // 116 't'
+    [0x00,0x00,0x11,0x11,0x11,0x13,0x0D], // 117 'u'
+    [0x00,0x00,0x11,0x11,0x11,0x0A,0x04], // 118 'v'
+    [0x00,0x00,0x11,0x11,0x15,0x15,0x0A], // 119 'w'
+    [0x00,0x00,0x11,0x0A,0x04,0x0A,0x11], // 120 'x'
+    [0x00,0x00,0x11,0x11,0x0F,0x01,0x0E], // 121 'y'
+    [0x00,0x00,0x1F,0x02,0x04,0x08,0x1F], // 122 'z'
+    [0x02,0x04,0x04,0x08,0x04,0x04,0x02], // 123 '{'
+    [0x04,0x04,0x04,0x04,0x04,0x04,0x04], // 124 '|'
+    [0x08,0x04,0x04,0x02,0x04,0x04,0x08], // 125 '}'
+    [0x00,0x00,0x08,0x15,0x02,0x00,0x00], // 126 '~'
+    [0x1F,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F], // 127 DEL (solid block)
+];
@@ -20,6 +20,7 @@ use std::time::Duration;
 use anyhow::{Context, Result};
 use esp_idf_hal::peripherals::Peripherals;
 use esp_idf_svc::eventloop::EspSystemEventLoop;
+use esp_idf_svc::handle::RawHandle;
 use esp_idf_svc::nvs::EspDefaultNvsPartition;
 use esp_idf_svc::wifi::{
    AccessPointConfiguration, AuthMethod, BlockingWifi, Configuration, EspWifi,
@@ -31,6 +32,7 @@ mod channels;
 mod common;
 mod config;
 mod control;
+#[cfg(not(feature = "nav-only"))]
 mod decoder;
 mod display;
 mod frame;
@@ -51,12 +53,17 @@ fn main() -> Result<()> {
    esp_idf_svc::log::EspLogger::initialize_default();

    log::info!("=== ESP32 Android Auto Navigation Head Unit ===");
+    #[cfg(feature = "nav-only")]
+    log::info!("Mode: NAV-ONLY (turn-by-turn text, no video decode)");
+    #[cfg(not(feature = "nav-only"))]
+    log::info!("Mode: FULL VIDEO (H.264 decode + display)");

    // Check PSRAM availability
    let free_psram = unsafe {
        esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_SPIRAM)
    };
    log::info!("PSRAM available: {} KB", free_psram / 1024);
+    #[cfg(not(feature = "nav-only"))]
    if free_psram < 1024 * 1024 {
        log::error!("PSRAM too small or not detected! Need >= 1MB, got {} KB", free_psram / 1024);
    }
@@ -122,28 +129,49 @@ fn main() -> Result<()> {
    // Channel for navigation events → UI thread
    let (nav_tx, nav_rx) = mpsc::channel::<navigation::NavEvent>();

-    // Channel for decoded video frames → display thread
-    // Bounded to 1: decoder blocks until display takes the frame.
-    // Frames are 300KB each (PSRAM-backed), so we can't queue many.
-    let (video_tx, video_rx) = mpsc::sync_channel::<session::VideoFrame>(1);
+    // --- Video mode: decode+display thread owns LCD ---
+    #[cfg(not(feature = "nav-only"))]
+    let decode_tx = {
+        // Channel for raw H.264 NAL data → decode+display thread.
+        let (decode_tx, decode_rx) = mpsc::sync_channel::<Vec<u8>>(2);

-    // Spawn navigation UI thread
-    let _ui_thread = thread::Builder::new()
-        .name("nav-ui".into())
-        .stack_size(8192)
-        .spawn(move || {
-            nav_ui_loop(nav_rx);
-        })
-        .context("spawning UI thread")?;
+        // Spawn navigation UI thread (log-only in video mode — LCD is owned by video)
+        let _ui_thread = thread::Builder::new()
+            .name("nav-ui".into())
+            .stack_size(8192)
+            .spawn(move || {
+                nav_ui_loop(nav_rx);
+            })
+            .context("spawning UI thread")?;

-    // Spawn video display thread — owns the LCD
-    let _video_thread = thread::Builder::new()
-        .name("video-display".into())
-        .stack_size(8192)
-        .spawn(move || {
-            video_display_loop(video_rx, lcd);
-        })
-        .context("spawning video display thread")?;
+        // Spawn decode+display thread — owns both H.264 decoder and LCD.
+        let _decode_display_thread = thread::Builder::new()
+            .name("decode-display".into())
+            .stack_size(16384)
+            .spawn(move || {
+                decode_display_loop(decode_rx, lcd);
+            })
+            .context("spawning decode+display thread")?;
+
+        decode_tx
+    };
+
+    // --- Nav-only mode: nav UI thread owns LCD, no video ---
+    #[cfg(feature = "nav-only")]
+    let decode_tx = {
+        // Spawn nav UI thread — owns the LCD and renders turn-by-turn text
+        let _ui_thread = thread::Builder::new()
+            .name("nav-ui".into())
+            .stack_size(16384)
+            .spawn(move || {
+                nav_ui_render_loop(nav_rx, lcd);
+            })
+            .context("spawning nav UI thread")?;
+
+        // Dummy decode channel — session will skip video data
+        let (decode_tx, _decode_rx) = mpsc::sync_channel::<Vec<u8>>(0);
+        decode_tx
+    };

    // Spawn touch polling thread — sends touch events to AA session
    let (touch_tx, touch_rx) = mpsc::channel::<touch::TouchEvent>();
@@ -165,12 +193,19 @@ fn main() -> Result<()> {
            &local_ip,
            &mac,
            &nav_tx,
-            &video_tx,
+            &decode_tx,
            &touch_rx,
        ) {
-            log::error!("Connection cycle failed: {:?}", e);
-            log::info!("Restarting in 5 seconds...");
-            thread::sleep(Duration::from_secs(5));
+            // Connection reset / broken pipe are normal when phone disconnects
+            let msg = format!("{:?}", e);
+            if msg.contains("os error 104") || msg.contains("os error 32") || msg.contains("Broken pipe") {
+                log::info!("Phone disconnected (connection reset). Reconnecting...");
+                thread::sleep(Duration::from_secs(2));
+            } else {
+                log::error!("Connection cycle failed: {}", msg);
+                log::info!("Restarting in 5 seconds...");
+                thread::sleep(Duration::from_secs(5));
+            }
        }
    }
 }
@@ -212,10 +247,56 @@ fn init_wifi_ap(
        .context("setting WiFi AP config")?;
    wifi.start().context("starting WiFi AP")?;

+    // Tell the DHCP server NOT to advertise a gateway or DNS.
+    // This makes the phone keep using 4G/mobile data for internet
+    // while using this WiFi link only for Android Auto.
+    disable_dhcp_gateway_dns(wifi.wifi().ap_netif());
+
    log::info!("WiFi AP started successfully");
    Ok(wifi)
 }

+/// Remove gateway and DNS from the AP's DHCP server so the phone keeps
+/// using mobile data (4G/5G) for internet while connected to our AP.
+fn disable_dhcp_gateway_dns(netif: &esp_idf_svc::netif::EspNetif) {
+    unsafe {
+        let handle = netif.handle();
+
+        // Stop DHCP server to modify options
+        let ret = esp_idf_sys::esp_netif_dhcps_stop(handle);
+        log::info!("DHCP server stop: {}", ret);
+
+        // Disable Router option (DHCP Option 3) — tells the DHCP server
+        // to NOT include a gateway in offers.  Android sees no default route
+        // on this WiFi and keeps using mobile data for internet.
+        let router_off: u8 = 0; // 0 = don't offer router
+        let ret = esp_idf_sys::esp_netif_dhcps_option(
+            handle,
+            esp_idf_sys::esp_netif_dhcp_option_mode_t_ESP_NETIF_OP_SET,
+            esp_idf_sys::esp_netif_dhcp_option_id_t_ESP_NETIF_ROUTER_SOLICITATION_ADDRESS,
+            &router_off as *const u8 as *mut core::ffi::c_void,
+            core::mem::size_of::<u8>() as u32,
+        );
+        log::info!("DHCP disable router option: {}", ret);
+
+        // Disable DNS option (DHCP Option 6) — no DNS server advertised.
+        let dns_off: u8 = 0;
+        let ret = esp_idf_sys::esp_netif_dhcps_option(
+            handle,
+            esp_idf_sys::esp_netif_dhcp_option_mode_t_ESP_NETIF_OP_SET,
+            esp_idf_sys::esp_netif_dhcp_option_id_t_ESP_NETIF_DOMAIN_NAME_SERVER,
+            &dns_off as *const u8 as *mut core::ffi::c_void,
+            core::mem::size_of::<u8>() as u32,
+        );
+        log::info!("DHCP disable DNS option: {}", ret);
+
+        // Restart DHCP server with new settings
+        let ret = esp_idf_sys::esp_netif_dhcps_start(handle);
+        log::info!("DHCP server restart: {}", ret);
+    }
+    log::info!("DHCP server: gateway and DNS disabled (phone keeps 4G for internet)");
+}
+
 /// Read the AP interface MAC address as a formatted string.
 fn get_ap_mac(wifi: &BlockingWifi<EspWifi<'static>>) -> String {
    // Try to read MAC from the AP network interface
@@ -251,7 +332,7 @@ fn run_connection_cycle(
    local_ip: &str,
    mac: &str,
    nav_tx: &mpsc::Sender<navigation::NavEvent>,
-    video_tx: &mpsc::SyncSender<session::VideoFrame>,
+    decode_tx: &mpsc::SyncSender<Vec<u8>>,
    touch_rx: &mpsc::Receiver<touch::TouchEvent>,
 ) -> Result<()> {
    log::info!(
@@ -280,8 +361,11 @@ fn run_connection_cycle(
                log::info!("Phone connected to us from {}", peer_addr);
                tcp_stream.set_nodelay(true)?;
                tcp_stream.set_nonblocking(false)?;
+                // TCP keepalive: detect dead connections within ~30s
+                // (phone might silently disappear e.g. screen off / WiFi roam)
+                let _ = set_tcp_keepalive(&tcp_stream);
                log::info!("Starting Android Auto protocol session...");
-                session::run_session(&mut tcp_stream, hu_config, nav_tx, video_tx, touch_rx)?;
+                session::run_session(&mut tcp_stream, hu_config, nav_tx, decode_tx, touch_rx)?;
                log::info!("Session ended cleanly");
                return Ok(());
            }
@@ -299,8 +383,9 @@ fn run_connection_cycle(
            ) {
                log::info!("Connected to phone's head unit server at {}", addr);
                tcp_stream.set_nodelay(true)?;
+                let _ = set_tcp_keepalive(&tcp_stream);
                log::info!("Starting Android Auto protocol session...");
-                session::run_session(&mut tcp_stream, hu_config, nav_tx, video_tx, touch_rx)?;
+                session::run_session(&mut tcp_stream, hu_config, nav_tx, decode_tx, touch_rx)?;
                log::info!("Session ended cleanly");
                return Ok(());
            }
@@ -311,36 +396,71 @@ fn run_connection_cycle(
    }
 }

-/// Navigation UI loop — receives NavEvents and updates the display.
+/// Enable TCP keepalive to detect dead connections.
+/// ESP-IDF lwIP supports SOL_SOCKET + SO_KEEPALIVE and TCP keepalive options.
+fn set_tcp_keepalive(stream: &std::net::TcpStream) -> Result<()> {
+    use std::os::fd::AsRawFd;
+    let fd = stream.as_raw_fd();
+    unsafe {
+        let enable: i32 = 1;
+        // Enable keepalive
+        esp_idf_sys::lwip_setsockopt(
+            fd, esp_idf_sys::SOL_SOCKET as i32, esp_idf_sys::SO_KEEPALIVE as i32,
+            &enable as *const i32 as *const std::ffi::c_void, 4,
+        );
+        // Start probes after 10 seconds of idle
+        let idle: i32 = 10;
+        esp_idf_sys::lwip_setsockopt(
+            fd, esp_idf_sys::IPPROTO_TCP as i32, esp_idf_sys::TCP_KEEPIDLE as i32,
+            &idle as *const i32 as *const std::ffi::c_void, 4,
+        );
+        // Probe every 5 seconds
+        let interval: i32 = 5;
+        esp_idf_sys::lwip_setsockopt(
+            fd, esp_idf_sys::IPPROTO_TCP as i32, esp_idf_sys::TCP_KEEPINTVL as i32,
+            &interval as *const i32 as *const std::ffi::c_void, 4,
+        );
+        // Give up after 3 failed probes (dead after ~25s)
+        let count: i32 = 3;
+        esp_idf_sys::lwip_setsockopt(
+            fd, esp_idf_sys::IPPROTO_TCP as i32, esp_idf_sys::TCP_KEEPCNT as i32,
+            &count as *const i32 as *const std::ffi::c_void, 4,
+        );
+    }
+    log::info!("TCP keepalive enabled (idle=10s, interval=5s, count=3)");
+    Ok(())
+}
+
+/// Navigation UI loop — receives NavEvents and logs them (video mode).
+/// In video mode, the LCD is owned by the decode+display thread.
+#[cfg(not(feature = "nav-only"))]
 fn nav_ui_loop(nav_rx: mpsc::Receiver<navigation::NavEvent>) {
-    log::info!("Navigation UI thread started");
+    log::info!("Navigation UI thread started (log-only, LCD owned by video)");

    loop {
        match nav_rx.recv() {
            Ok(event) => {
                match &event {
                    navigation::NavEvent::StatusChanged(status) => {
-                        log::info!("🧭 Nav status: {:?}", status);
+                        log::info!("Nav status: {:?}", status);
                    }
                    navigation::NavEvent::TurnInstruction(turn) => {
                        log::info!(
-                            "🔄 Turn: {} — {:?} {:?} (image: {} bytes)",
+                            "Turn: {} — {:?} {:?} (image: {} bytes)",
                            turn.street_name,
                            turn.direction,
                            turn.maneuver,
                            turn.turn_image.len()
                        );
-                        // TODO: Render turn image + text on Slint UI
                    }
                    navigation::NavEvent::DistanceUpdate(dist) => {
                        log::info!(
-                            "📏 Distance: {}m, {} {:?}, ETA: {}s",
+                            "Distance: {}m, {} {:?}, ETA: {}s",
                            dist.meters,
                            dist.distance_to_step_millis,
                            dist.unit,
                            dist.time_to_step_seconds
                        );
-                        // TODO: Update distance display on Slint UI
                    }
                }
            }
@@ -352,45 +472,492 @@ fn nav_ui_loop(nav_rx: mpsc::Receiver<navigation::NavEvent>) {
    }
 }

-/// Video display loop — receives decoded RGB565 frames and sends them to the LCD.
-fn video_display_loop(video_rx: mpsc::Receiver<session::VideoFrame>, lcd: display::Display) {
-    log::info!("Video display thread started");
+/// Navigation UI render loop — receives NavEvents and displays turn-by-turn
+/// info directly on the LCD (nav-only mode, no video decode).
+///
+/// Uses strip-based rendering: builds each 480×40 strip in memory, then
+/// flushes once via DMA. Avoids per-character DMA race conditions.
+/// Decodes PNG turn arrow images and blits them alongside text.
+#[cfg(feature = "nav-only")]
+fn nav_ui_render_loop(nav_rx: mpsc::Receiver<navigation::NavEvent>, lcd: display::Display) {
+    log::info!("Nav UI render thread started (nav-only, strip-based)");

-    let mut frame_count: u64 = 0;
+    let mut state = NavDisplayState::default();
+
+    // Show waiting screen
+    render_nav_screen(&lcd, &state);

    loop {
-        // Use timeout so this thread doesn't block forever when no frames arrive
-        match video_rx.recv_timeout(Duration::from_secs(10)) {
-            Ok(mut frame) => {
-                // Drain to latest frame — skip stale queued frames to reduce latency
-                while let Ok(newer) = video_rx.try_recv() {
-                    frame = newer;
-                }
+        let event = nav_rx.recv_timeout(std::time::Duration::from_secs(5));

+        match event {
+            Ok(navigation::NavEvent::StatusChanged(status)) => {
+                log::info!("Nav status: {:?}", status);
+                match status {
+                    navigation::NavStatus::Active => {
+                        state.line_maneuver = "Navigating...".into();
+                        state.line_street.clear();
+                        state.needs_redraw = true;
+                    }
+                    navigation::NavStatus::Inactive => {
+                        state = NavDisplayState::default();
+                        state.line_maneuver = "Navigation".into();
+                        state.line_street = "inactive".into();
+                        state.needs_redraw = true;
+                    }
+                    navigation::NavStatus::Rerouting => {
+                        state.line_maneuver = "Rerouting...".into();
+                        state.line_street.clear();
+                        state.line_distance.clear();
+                        state.line_eta.clear();
+                        state.needs_redraw = true;
+                    }
+                    _ => {}
+                }
+            }
+            Ok(navigation::NavEvent::TurnInstruction(turn)) => {
+                state.line_maneuver = format_maneuver(turn.maneuver, turn.direction);
+                state.line_street = turn.street_name.clone();
+                // Decode PNG turn image (256×256) → 64×64 RGB565
+                if !turn.turn_image.is_empty() {
+                    state.turn_icon = decode_png_to_rgb565(&turn.turn_image, 64, 64);
+                    if state.turn_icon.is_some() {
+                        log::info!("Turn icon decoded (64x64)");
+                    }
+                }
+                state.needs_redraw = true;
+                log::info!("Turn: {} {:?} {:?}", turn.street_name, turn.direction, turn.maneuver);
+            }
+            Ok(navigation::NavEvent::DistanceUpdate(dist)) => {
+                state.line_distance = format_distance(dist.meters, dist.unit);
+                state.line_eta = if dist.time_to_step_seconds > 0 {
+                    format!("ETA: {}", format_eta(dist.time_to_step_seconds))
+                } else {
+                    String::new()
+                };
+                state.needs_redraw = true;
+            }
+            Err(mpsc::RecvTimeoutError::Timeout) => {}
+            Err(mpsc::RecvTimeoutError::Disconnected) => {
+                log::info!("Nav channel closed, UI thread exiting");
+                break;
+            }
+        }
+
+        if state.needs_redraw {
+            render_nav_screen(&lcd, &state);
+            state.needs_redraw = false;
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Nav-only display state + rendering
+// ---------------------------------------------------------------------------
+
+#[cfg(feature = "nav-only")]
+struct NavDisplayState {
+    line_maneuver: String,
+    line_street: String,
+    line_distance: String,
+    line_eta: String,
+    /// 64×64 RGB565 decoded turn arrow (None if no image yet)
+    turn_icon: Option<Vec<u16>>,
+    needs_redraw: bool,
+}
+
+#[cfg(feature = "nav-only")]
+impl Default for NavDisplayState {
+    fn default() -> Self {
+        Self {
+            line_maneuver: "Waiting for".into(),
+            line_street: "navigation...".into(),
+            line_distance: String::new(),
+            line_eta: String::new(),
+            turn_icon: None,
+            needs_redraw: true,
+        }
+    }
+}
+
+/// Strip-based full-screen compositing for the nav display.
+///
+/// Layout (480×320):
+///   [64×64 icon at (16,8)]  maneuver text (y=16, scale 3)
+///                            street name  (y=48, scale 3)
+///   distance    (y=140, scale 5, centered)
+///   ETA         (y=220, scale 3, centered)
+///   distance_m  (y=280, scale 2, gray — raw meters for debug)
+#[cfg(feature = "nav-only")]
+fn render_nav_screen(lcd: &display::Display, state: &NavDisplayState) {
+    use display::{DISPLAY_WIDTH, DISPLAY_HEIGHT, STRIP_LINES,
+                  render_text_to_strip, render_text_centered_to_strip, render_image_to_strip};
+
+    const W: u16 = DISPLAY_WIDTH;
+    const STRIP: u16 = STRIP_LINES as u16;
+
+    // Colors
+    const WHITE: u16 = 0xFFFF;
+    const CYAN: u16 = 0x07FF;
+    const GREEN: u16 = 0x07E0;
+    const GRAY: u16 = 0xC618;
+
+    // Text X for lines next to the icon
+    let text_x: u16 = if state.turn_icon.is_some() { 96 } else { 16 };
+
+    let mut buf_idx: usize = 0;
+    for strip_y in (0..DISPLAY_HEIGHT).step_by(STRIP as usize) {
+        let h = STRIP.min(DISPLAY_HEIGHT - strip_y);
+        let buf = lcd.dma_stage_mut(buf_idx);
+
+        // Clear strip to black
+        for p in &mut buf[..W as usize * h as usize] {
+            *p = 0x0000;
+        }
+
+        // Turn icon (64×64 at position 16, 8)
+        if let Some(ref icon) = state.turn_icon {
+            render_image_to_strip(buf, W, strip_y, h, icon, 64, 64, 16, 8);
+        }
+
+        // Maneuver (e.g. "TURN RIGHT") — scale 3, white
+        render_text_to_strip(buf, W, strip_y, h, &state.line_maneuver, text_x, 16, WHITE, 3);
+
+        // Street name — scale 3, cyan
+        render_text_to_strip(buf, W, strip_y, h, &state.line_street, text_x, 48, CYAN, 3);
+
+        // Distance — scale 5, green, centered
+        render_text_centered_to_strip(buf, W, strip_y, h, &state.line_distance, 140, GREEN, 5);
+
+        // ETA — scale 3, gray, centered
+        render_text_centered_to_strip(buf, W, strip_y, h, &state.line_eta, 220, GRAY, 3);
+
+        lcd.flush_strip(strip_y, h, buf_idx);
+        buf_idx ^= 1;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Nav-only helper functions
+// ---------------------------------------------------------------------------
+
+#[cfg(feature = "nav-only")]
+fn format_distance(meters: u32, unit: navigation::DistanceUnit) -> String {
+    match unit {
+        navigation::DistanceUnit::Meters => format!("{}m", meters),
+        navigation::DistanceUnit::Kilometers => format!("{:.1}km", meters as f32 / 1000.0),
+        navigation::DistanceUnit::KilometersPartial => format!("{:.1}km", meters as f32 / 1000.0),
+        navigation::DistanceUnit::Miles => format!("{:.1}mi", meters as f32 / 1609.0),
+        navigation::DistanceUnit::MilesPartial => format!("{:.1}mi", meters as f32 / 1609.0),
+        navigation::DistanceUnit::Feet => format!("{}ft", (meters as f32 * 3.281) as u32),
+        navigation::DistanceUnit::Yards => format!("{}yd", (meters as f32 * 1.094) as u32),
+        _ => format!("{}m", meters),
+    }
+}
+
+#[cfg(feature = "nav-only")]
+fn format_eta(seconds: u32) -> String {
+    if seconds < 60 {
+        "<1 min".into()
+    } else if seconds < 3600 {
+        format!("{} min", seconds / 60)
+    } else {
+        format!("{}h {}min", seconds / 3600, (seconds % 3600) / 60)
+    }
+}
+
+#[cfg(feature = "nav-only")]
+fn format_maneuver(maneuver: navigation::ManeuverType, direction: navigation::ManeuverDirection) -> String {
+    let dir = match direction {
+        navigation::ManeuverDirection::Left => "LEFT",
+        navigation::ManeuverDirection::Right => "RIGHT",
+        _ => "",
+    };
+    let man = match maneuver {
+        navigation::ManeuverType::Depart => "DEPART",
+        navigation::ManeuverType::NameChange => "CONTINUE",
+        navigation::ManeuverType::SlightTurn => "SLIGHT TURN",
+        navigation::ManeuverType::Turn => "TURN",
+        navigation::ManeuverType::SharpTurn => "SHARP TURN",
+        navigation::ManeuverType::UTurn => "U-TURN",
+        navigation::ManeuverType::OnRamp => "ON RAMP",
+        navigation::ManeuverType::OffRamp => "EXIT",
+        navigation::ManeuverType::Fork => "FORK",
+        navigation::ManeuverType::Merge => "MERGE",
+        navigation::ManeuverType::RoundaboutEnter => "ROUNDABOUT",
+        navigation::ManeuverType::RoundaboutExit => "EXIT ROUNDABOUT",
+        navigation::ManeuverType::RoundaboutEnterAndExit => "ROUNDABOUT",
+        navigation::ManeuverType::Straight => "STRAIGHT",
+        navigation::ManeuverType::FerryBoat => "FERRY",
+        navigation::ManeuverType::FerryTrain => "FERRY",
+        navigation::ManeuverType::Destination => "ARRIVE",
+        _ => "",
+    };
+    if dir.is_empty() { man.to_string() } else { format!("{} {}", man, dir) }
+}
+
+// ---------------------------------------------------------------------------
+// Minimal PNG decoder (RGB/RGBA 8-bit → downscaled RGB565)
+//
+// Handles the PNG subset that Android Auto sends for turn arrows:
+// - 8-bit RGB or RGBA, non-interlaced
+// - Applies PNG row filters (None, Sub, Up, Average, Paeth)
+// - Downscales to target_w × target_h via nearest-neighbor
+// - Uses miniz_oxide for zlib inflate (pure Rust, no C deps)
+// ---------------------------------------------------------------------------
+
+#[cfg(feature = "nav-only")]
+fn decode_png_to_rgb565(png_data: &[u8], target_w: u32, target_h: u32) -> Option<Vec<u16>> {
+    // Validate PNG signature
+    if png_data.len() < 33 || &png_data[0..8] != b"\x89PNG\r\n\x1a\n" {
+        log::warn!("PNG: invalid signature");
+        return None;
+    }
+
+    // Parse IHDR (must be first chunk after signature)
+    let ihdr_len = u32::from_be_bytes(png_data[8..12].try_into().ok()?) as usize;
+    if &png_data[12..16] != b"IHDR" || ihdr_len != 13 {
+        log::warn!("PNG: missing IHDR");
+        return None;
+    }
+    let width = u32::from_be_bytes(png_data[16..20].try_into().ok()?);
+    let height = u32::from_be_bytes(png_data[20..24].try_into().ok()?);
+    let bit_depth = png_data[24];
+    let color_type = png_data[25];
+    let _compression = png_data[26];
+    let _filter = png_data[27];
+    let interlace = png_data[28];
+
+    if bit_depth != 8 || interlace != 0 {
+        log::warn!("PNG: unsupported depth={} interlace={}", bit_depth, interlace);
+        return None;
+    }
+
+    let channels: usize = match color_type {
+        2 => 3,  // RGB
+        6 => 4,  // RGBA
+        _ => {
+            log::warn!("PNG: unsupported color_type={}", color_type);
+            return None;
+        }
+    };
+
+    // Collect all IDAT chunk data
+    let mut idat_data = Vec::new();
+    let mut pos = 8; // after signature
+    while pos + 12 <= png_data.len() {
+        let chunk_len = u32::from_be_bytes(png_data[pos..pos + 4].try_into().ok()?) as usize;
+        let chunk_type = &png_data[pos + 4..pos + 8];
+        if pos + 8 + chunk_len + 4 > png_data.len() { break; }
+
+        if chunk_type == b"IDAT" {
+            idat_data.extend_from_slice(&png_data[pos + 8..pos + 8 + chunk_len]);
+        } else if chunk_type == b"IEND" {
+            break;
+        }
+        pos += 8 + chunk_len + 4; // header(8) + data + crc(4)
+    }
+
+    if idat_data.is_empty() {
+        log::warn!("PNG: no IDAT data");
+        return None;
+    }
+
+    // Inflate (zlib format)
+    let raw = miniz_oxide::inflate::decompress_to_vec_zlib(&idat_data).ok().or_else(|| {
+        log::warn!("PNG: zlib inflate failed");
+        None
+    })?;
+
+    let stride = width as usize * channels + 1; // filter byte + pixel data per row
+    if raw.len() < stride * height as usize {
+        log::warn!("PNG: inflated data too short ({} < {})", raw.len(), stride * height as usize);
+        return None;
+    }
+
+    // Unfilter in-place (we need a mutable copy)
+    let mut pixels = raw;
+    png_unfilter(&mut pixels, width as usize, height as usize, channels);
+
+    // Downscale + convert to RGB565
+    let mut rgb565 = vec![0u16; (target_w * target_h) as usize];
+    for dy in 0..target_h {
+        let sy = (dy * height / target_h) as usize;
+        let row_start = sy * stride + 1; // +1 to skip filter byte
+        for dx in 0..target_w {
+            let sx = (dx * width / target_w) as usize;
+            let off = row_start + sx * channels;
+            if off + 2 < pixels.len() {
+                let r = pixels[off] as u16;
+                let g = pixels[off + 1] as u16;
+                let b = pixels[off + 2] as u16;
+                rgb565[(dy * target_w + dx) as usize] = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+            }
+        }
+    }
+
+    Some(rgb565)
+}
+
+/// Apply PNG row filters in-place.
+/// Each row is: [filter_byte, pixel_data...] with `stride = width * channels + 1`.
+#[cfg(feature = "nav-only")]
+fn png_unfilter(data: &mut [u8], width: usize, height: usize, channels: usize) {
+    let stride = width * channels + 1;
+    let bpp = channels; // bytes per pixel (1 byte per channel at 8-bit)
+
+    for y in 0..height {
+        let row_start = y * stride;
+        let filter = data[row_start];
+        let data_start = row_start + 1;
+        let data_end = data_start + width * channels;
+
+        match filter {
+            0 => {} // None
+            1 => {
+                // Sub: a[x] += a[x - bpp]
+                for x in bpp..(data_end - data_start) {
+                    data[data_start + x] = data[data_start + x].wrapping_add(data[data_start + x - bpp]);
+                }
+            }
+            2 => {
+                // Up: a[x] += b[x]
+                if y > 0 {
+                    let prev_start = (y - 1) * stride + 1;
+                    for x in 0..(data_end - data_start) {
+                        data[data_start + x] = data[data_start + x].wrapping_add(data[prev_start + x]);
+                    }
+                }
+            }
+            3 => {
+                // Average: a[x] += floor((a[x-bpp] + b[x]) / 2)
+                let prev_start = if y > 0 { (y - 1) * stride + 1 } else { 0 };
+                for x in 0..(data_end - data_start) {
+                    let left = if x >= bpp { data[data_start + x - bpp] as u16 } else { 0 };
+                    let up = if y > 0 { data[prev_start + x] as u16 } else { 0 };
+                    data[data_start + x] = data[data_start + x].wrapping_add(((left + up) / 2) as u8);
+                }
+            }
+            4 => {
+                // Paeth: a[x] += PaethPredictor(left, up, up_left)
+                let prev_start = if y > 0 { (y - 1) * stride + 1 } else { 0 };
+                for x in 0..(data_end - data_start) {
+                    let a = if x >= bpp { data[data_start + x - bpp] as i32 } else { 0 };
+                    let b = if y > 0 { data[prev_start + x] as i32 } else { 0 };
+                    let c = if x >= bpp && y > 0 { data[prev_start + x - bpp] as i32 } else { 0 };
+                    let p = a + b - c;
+                    let pa = (p - a).abs();
+                    let pb = (p - b).abs();
+                    let pc = (p - c).abs();
+                    let pred = if pa <= pb && pa <= pc { a } else if pb <= pc { b } else { c };
+                    data[data_start + x] = data[data_start + x].wrapping_add(pred as u8);
+                }
+            }
+            _ => {} // Unknown filter — leave as-is
+        }
+    }
+}
+
+/// Decode + display loop — owns both the H.264 decoder and the LCD.
+///
+/// Only compiled in video mode (not nav-only).
+#[cfg(not(feature = "nav-only"))]
+fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display) {
+    log::info!("Decode+display thread started (strip-by-strip direct-to-DMA)");
+
+    let mut dec: Option<decoder::H264Decoder> = None;
+    let mut frame_count: u64 = 0;
+    let mut skip_count: u64 = 0;
+    let strip_h: u32 = display::STRIP_LINES as u32;
+
+    loop {
+        // Block for next NAL chunk
+        let mut data = match decode_rx.recv() {
+            Ok(d) => d,
+            Err(_) => {
+                log::info!("Decode channel closed, decode+display thread exiting");
+                return;
+            }
+        };
+
+        // Drain all queued chunks: decode each one to maintain H.264 state,
+        // but skip the expensive YUV→RGB565 conversion (discard mode).
+        // Only the latest chunk will get full conversion + display.
+        loop {
+            match decode_rx.try_recv() {
+                Ok(next) => {
+                    if let Some(d) = &mut dec {
+                        let _ = d.decode_into(&data, &mut []); // decode-only
+                        skip_count += 1;
+                    }
+                    data = next;
+                }
+                Err(_) => break,
+            }
+        }
+
+        // Lazy init decoder on first data
+        if dec.is_none() {
+            match decoder::H264Decoder::new(decoder::DecoderConfig::default()) {
+                Ok(d) => {
+                    log::info!("H.264 decoder initialized on decode+display thread");
+                    dec = Some(d);
+                }
+                Err(e) => {
+                    log::error!("Failed to init H.264 decoder: {:?}", e);
+                    continue;
+                }
+            }
+        }
+        let d = dec.as_mut().unwrap();
+
+        // Decode the latest NAL → get raw I420 pointer
+        match d.decode_raw(&data) {
+            Ok(Some((i420_ptr, i420_len))) => {
                frame_count += 1;

-                // Log every frame for now (test pattern is infrequent)
-                log::info!(
-                    "🖥️ Display frame #{} ({} pixels)",
-                    frame_count,
-                    frame.pixels().len()
-                );
+                if frame_count % 60 == 1 {
+                    let free_psram = unsafe {
+                        esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_SPIRAM)
+                    };
+                    let free_dram = unsafe {
+                        esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_INTERNAL)
+                    };
+                    log::info!(
+                        "Frame #{} (skipped {}, PSRAM {}KB, DRAM {}KB free)",
+                        frame_count, skip_count, free_psram / 1024, free_dram / 1024,
+                    );
+                }

-                // Send RGB565 framebuffer to display
-                let bytes = unsafe {
-                    std::slice::from_raw_parts(
-                        frame.pixels().as_ptr() as *const u8,
-                        frame.pixels().len() * 2,
-                    )
-                };
-                lcd.draw_rgb565(bytes);
+                // SAFETY: I420 data is component-owned, valid until next decode call.
+                // We consume it fully here before the next loop iteration.
+                let i420 = unsafe { core::slice::from_raw_parts(i420_ptr, i420_len) };
+
+                let (dst_w, dst_h) = d.output_dimensions();
+
+                // Strip-by-strip: convert I420→RGB565 directly into DMA staging
+                // SRAM buffers and push to LCD.  Alternating buffers overlap
+                // DMA transfer with CPU conversion (double-buffered pipeline).
+                let mut buf_idx: usize = 0;
+                for y in (0..dst_h).step_by(strip_h as usize) {
+                    let h = strip_h.min(dst_h - y);
+                    let dma_buf = lcd.dma_stage_mut(buf_idx);
+
+                    decoder::i420_to_rgb565_strip(
+                        i420,
+                        d.source_width(), d.source_height(),
+                        dst_w, dst_h,
+                        y, h,
+                        dma_buf,
+                    );
+
+                    lcd.flush_strip(y as u16, h as u16, buf_idx);
+                    buf_idx ^= 1;
+                }
            }
-            Err(mpsc::RecvTimeoutError::Timeout) => {
-                log::debug!("Video display: no frames for 10s (waiting...)");
-            }
-            Err(mpsc::RecvTimeoutError::Disconnected) => {
-                log::info!("Video frame channel closed, display thread exiting");
-                break;
+            Ok(None) => {} // SPS/PPS/SEI — no image data (normal at stream start)
+            Err(e) => {
+                log::warn!("H.264 decode error: {:?}", e);
            }
        }
    }
@@ -401,7 +968,7 @@ fn touch_poll_loop(mut touch: touch::Touch<'static>, tx: mpsc::Sender<touch::Tou
    log::info!("Touch polling thread started");
    loop {
        if let Some(event) = touch.poll() {
-            log::info!("👆 Touch: ({}, {}) pressed={}", event.x, event.y, event.pressed);
+            log::debug!("👆 Touch: ({}, {}) pressed={}", event.x, event.y, event.pressed);
            let _ = tx.send(event);
        }
        std::thread::sleep(Duration::from_millis(33)); // ~30Hz
@@ -9,7 +9,6 @@

 use std::io::{Read, Write};
 use std::sync::mpsc;
-use std::thread;

 use anyhow::{Context, Result, bail};
 use protobuf::{Enum, Message};
@@ -18,65 +17,11 @@ use crate::channels;
 use crate::common::CommonMessage;
 use crate::config::HeadUnitConfig;
 use crate::control::{self, ControlMessage};
-use crate::decoder::{DecoderConfig, H264Decoder, DISPLAY_WIDTH, DISPLAY_HEIGHT};
 use crate::frame::{self, Frame, FrameReader, TlsState};
 use crate::navigation::{self, NavEvent};
 use crate::proto::Wifi;
 use crate::touch::TouchEvent;

-/// A decoded video frame ready for display (RGB565).
-/// Pixel data is allocated in PSRAM (not DRAM) to avoid OOM —
-/// each frame is 480×320×2 = 300KB, far too large for ~300KB DRAM heap.
-pub struct VideoFrame {
-    /// PSRAM-allocated RGB565 pixel buffer.
-    ptr: *mut u16,
-    /// Number of pixels (DISPLAY_WIDTH × DISPLAY_HEIGHT).
-    len: usize,
-    /// Monotonic frame number.
-    pub frame_number: u64,
-}
-
-// SAFETY: VideoFrame is only sent between threads via channel, never shared.
-unsafe impl Send for VideoFrame {}
-
-impl VideoFrame {
-    /// Allocate a new frame buffer in PSRAM.
-    pub fn new(frame_number: u64) -> anyhow::Result<Self> {
-        let len = (DISPLAY_WIDTH * DISPLAY_HEIGHT) as usize;
-        let ptr = unsafe {
-            esp_idf_sys::heap_caps_malloc(
-                len * 2,
-                esp_idf_sys::MALLOC_CAP_SPIRAM,
-            ) as *mut u16
-        };
-        if ptr.is_null() {
-            anyhow::bail!("Failed to allocate VideoFrame in PSRAM ({} KB)", len * 2 / 1024);
-        }
-        Ok(Self { ptr, len, frame_number })
-    }
-
-    /// Read the pixel data.
-    pub fn pixels(&self) -> &[u16] {
-        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
-    }
-
-    /// Copy decoded RGB565 data into this frame.
-    pub fn copy_from(&mut self, src: &[u16]) {
-        debug_assert_eq!(src.len(), self.len);
-        unsafe {
-            std::ptr::copy_nonoverlapping(src.as_ptr(), self.ptr, self.len);
-        }
-    }
-}
-
-impl Drop for VideoFrame {
-    fn drop(&mut self) {
-        if !self.ptr.is_null() {
-            unsafe { esp_idf_sys::heap_caps_free(self.ptr as *mut std::ffi::c_void); }
-        }
-    }
-}
-
 /// The channel IDs we assign during service discovery.
 /// These must match the order we build channel descriptors.
 #[derive(Debug, Clone, Copy)]
@@ -113,15 +58,17 @@ impl Default for ChannelMap {
 /// Run the Android Auto protocol session over an established TCP stream.
 ///
 /// `nav_tx` sends navigation events to the UI thread.
-/// `video_tx` sends decoded RGB565 frames to the display thread.
+/// `decode_tx` sends raw H.264 NAL data to the long-lived decode+display thread.
 /// `touch_rx` receives touch events from the touch polling thread.
 pub fn run_session<S: Read + Write>(
    stream: &mut S,
    config: &HeadUnitConfig,
    nav_tx: &mpsc::Sender<NavEvent>,
-    video_tx: &mpsc::SyncSender<VideoFrame>,
+    decode_tx: &mpsc::SyncSender<Vec<u8>>,
    touch_rx: &mpsc::Receiver<TouchEvent>,
 ) -> Result<()> {
+    #[cfg(feature = "nav-only")]
+    let _ = &decode_tx; // suppress unused warning in nav-only mode
    let ch = ChannelMap::default();
    let mut tls = TlsState::new();
    let mut reader = FrameReader::new();
@@ -129,19 +76,6 @@ pub fn run_session<S: Read + Write>(
    // Build channel descriptors for service discovery
    let channel_descs = build_channel_descriptors(&ch);

-    // Dual-core H.264 decode pipeline:
-    // Session thread feeds raw H.264 chunks via `decode_tx`.
-    // Decoder thread (pinned to core 1) decodes and sends RGB565 via `video_tx`.
-    let (decode_tx, decode_rx) = mpsc::sync_channel::<Vec<u8>>(2);
-    let video_tx_dec = video_tx.clone();
-    let _decoder_thread = thread::Builder::new()
-        .name("h264-decode".into())
-        .stack_size(16384)
-        .spawn(move || {
-            decoder_loop(decode_rx, video_tx_dec);
-        })
-        .context("spawning decoder thread")?;
-
    // Step 1: Send version request
    log::info!("Sending version request");
    let version_frame = control::version_request_frame();
@@ -163,9 +97,9 @@ pub fn run_session<S: Read + Write>(
        let is_control_bit = frame.header.frame.get_control();
        let is_encrypted = frame.header.frame.get_encryption();

-        // Yield every 50 iterations so IDLE0 can run (prevents task WDT)
+        // Yield every 10 iterations so IDLE0 can run (prevents task WDT)
        loop_count += 1;
-        if loop_count % 50 == 0 {
+        if loop_count % 10 == 0 {
            std::thread::yield_now();
        }

@@ -175,8 +109,8 @@ pub fn run_session<S: Read + Write>(
            send_touch_event(stream, &mut tls, ch.input, te, &mut touch_pressed, &mut touch_event_count)?;
        }

-        // Log every incoming frame for debugging
-        log::info!(
+        // Log incoming frames at debug level (very high volume)
+        log::debug!(
            "⬅️  ch={} ctrl={} enc={} len={} data={:02x?}",
            channel_id, is_control_bit, is_encrypted,
            frame.data.len(),
@@ -299,15 +233,21 @@ pub fn run_session<S: Read + Write>(
            continue;
        }

-        // Video channel — forward raw H.264 data to the decode thread
+        // Video channel
        if channel_id == ch.video {
            if let Ok(av) = channels::parse_av_frame(&frame) {
                match av {
                    channels::AvMessage::SetupRequest { .. } => {
+                        // Always accept setup — phone requires it for session.
                        log::info!("Video setup request");
                        let resp = channels::video_setup_response_frame(channel_id);
                        frame::write_frame(stream, &resp, &mut tls)?;
-                        // VideoFocusIndication is sent only after VideoFocusRequest
+                        // Proactively tell the phone we're ready for video.
+                        // The phone won't send StartIndication until the HU
+                        // sends an unsolicited VideoFocusIndication.
+                        let kick = channels::video_focus_frame_unrequested(channel_id, true);
+                        frame::write_frame(stream, &kick, &mut tls)?;
+                        log::info!("Sent unsolicited VideoFocusIndication (FOCUSED)");
                    }
                    channels::AvMessage::StartIndication { session, .. } => {
                        log::info!("Video start (session={})", session);
@@ -321,21 +261,29 @@ pub fn run_session<S: Read + Write>(
                    channels::AvMessage::MediaData { data, .. } => {
                        video_ack_counter += 1;

-                        // Forward to decode thread (non-blocking, drop if full)
-                        let _ = decode_tx.try_send(data);
+                        // Forward to decode thread (video mode) or discard (nav-only)
+                        #[cfg(not(feature = "nav-only"))]
+                        { let _ = decode_tx.try_send(data); }
+                        #[cfg(feature = "nav-only")]
+                        drop(data);

-                        // Ack every frame so phone keeps sending
+                        // Ack so phone doesn't stall — non-fatal on error
                        if let Some(session) = video_session {
                            let ack = channels::media_ack_frame(
                                channel_id,
                                session,
                                video_ack_counter,
                            );
-                            frame::write_frame(stream, &ack, &mut tls)?;
+                            if let Err(e) = frame::write_frame(stream, &ack, &mut tls) {
+                                log::warn!("Video ack write failed: {:?}", e);
+                            }
                        }
                    }
                    channels::AvMessage::VideoFocusRequest { focused } => {
                        log::info!("Video focus request: {}", focused);
+                        // Always respond FOCUSED — phone won't send nav data
+                        // unless video is active.  In nav-only we just drop the
+                        // H.264 frames (acks are non-fatal).
                        let focus = channels::video_focus_frame(channel_id, true);
                        frame::write_frame(stream, &focus, &mut tls)?;
                    }
@@ -454,6 +402,8 @@ fn build_channel_descriptors(ch: &ChannelMap) -> Vec<Wifi::ChannelDescriptor> {
        // ch.control = 0 (no descriptor needed, implicit)
        channels::build_input_channel_descriptor(ch.input),
        channels::build_sensor_channel_descriptor(ch.sensor),
+        // Always advertise video — phone requires it for the session to stay alive.
+        // In nav-only mode we reject setup with FAIL and respond UNFOCUSED.
        channels::build_video_channel_descriptor(ch.video),
        channels::build_media_audio_channel_descriptor(ch.media_audio),
        channels::build_speech_audio_channel_descriptor(ch.speech_audio),
@@ -481,15 +431,15 @@ fn send_touch_event<S: Read + Write>(
    touch_pressed: &mut bool,
    count: &mut u64,
 ) -> Result<()> {
-    // FT6336U reports in portrait (native panel) coords: x ∈ [0,319], y ∈ [0,479].
-    // Display is landscape via MADCTL MV|MY (swap_xy + mirror_y):
-    //   landscape_x = 479 - touch_y  (raw_y=0 → right edge, raw_y=479 → left edge)
-    //   landscape_y = touch_x        (raw_x=0 → top, raw_x=319 → bottom)
-    // Then scale landscape coords to AA video source (800×480).
-    let disp_x = 479u32.saturating_sub(te.y as u32);       // 0..479
-    let disp_y = te.x as u32;                               // 0..319
-    let aa_x = disp_x * 800 / DISPLAY_WIDTH;
-    let aa_y = disp_y * 480 / DISPLAY_HEIGHT;
+    // FT6336U on WT32-SC01 Plus with MADCTL MV|MY (landscape):
+    //   raw_x: 0..319 maps to display Y (top→bottom)
+    //   raw_y: 0..479 maps to display X (right→left, inverted)
+    // Verified empirically:
+    //   bottom-right raw(273,11) → AA(780,409) ✓
+    //   bottom-left  raw(291,446) → AA(55,436) ✓
+    //   top-left     raw(16,453) → AA(43,24)  ✓
+    let aa_x = (479u32.saturating_sub(te.y as u32)) * 800 / 480;
+    let aa_y = (te.x as u32) * 480 / 320;

    let action = if te.pressed {
        if *touch_pressed {
@@ -534,84 +484,3 @@ fn send_touch_event<S: Read + Write>(
    frame::write_frame(stream, &frame, tls)?;
    Ok(())
 }
-
-/// H.264 decode loop — runs on a dedicated thread (core 1 on ESP32-S3).
-///
-/// Receives raw H.264 NAL chunks from the session thread, decodes them,
-/// and sends RGB565 frames to the display thread.  This offloads the
-/// heavy SW decode from the session/protocol thread (core 0).
-///
-/// IMPORTANT: We must feed EVERY NAL unit to the decoder — H.264 is a
-/// stateful codec where P-frames reference the previous decoded frame.
-/// Skipping NALs corrupts the decoder state and causes visual artifacts.
-/// Frame dropping happens AFTER decode, at the output stage.
-fn decoder_loop(
-    decode_rx: mpsc::Receiver<Vec<u8>>,
-    video_tx: mpsc::SyncSender<VideoFrame>,
-) {
-    let mut decoder: Option<H264Decoder> = None;
-    let mut chunk_count: u32 = 0;
-    let mut drop_count: u32 = 0;
-
-    loop {
-        let data = match decode_rx.recv() {
-            Ok(d) => d,
-            Err(_) => {
-                log::info!("🎬 Decode thread: channel closed, exiting");
-                return;
-            }
-        };
-
-        // Lazy init: create decoder on first data received
-        if decoder.is_none() {
-            match H264Decoder::new(DecoderConfig::default()) {
-                Ok(dec) => {
-                    log::info!("🎬 H.264 decoder initialized on decode thread");
-                    decoder = Some(dec);
-                }
-                Err(e) => {
-                    log::error!("Failed to init H.264 decoder: {:?}", e);
-                    continue;
-                }
-            }
-        }
-
-        let dec = decoder.as_mut().unwrap();
-        chunk_count += 1;
-
-        if chunk_count % 30 == 1 {
-            log::info!(
-                "📹 Video data: frame #{}, chunk {} bytes, total NAL {}",
-                chunk_count, data.len(), dec.nal_len()
-            );
-        }
-
-        let frame_num = dec.frames_decoded();
-        match dec.decode(&data) {
-            Ok(Some(rgb565)) => {
-                match VideoFrame::new(frame_num + 1) {
-                    Ok(mut vf) => {
-                        vf.copy_from(rgb565);
-                        // Non-blocking: drop frame if display is still busy
-                        // rather than stalling the decoder pipeline
-                        match video_tx.try_send(vf) {
-                            Ok(_) => {}
-                            Err(mpsc::TrySendError::Full(_)) => {
-                                drop_count += 1;
-                                if drop_count % 30 == 1 {
-                                    log::info!("⏩ Dropped {} decoded frames (display busy)", drop_count);
-                                }
-                            }
-                            Err(mpsc::TrySendError::Disconnected(_)) => return,
-                        }
-                    }
-                    Err(e) => log::warn!("Frame alloc failed: {:?}", e),
-                }
-            }
-            Ok(None) => {}
-            Err(e) => {
-                log::warn!("H.264 decode error: {:?}", e);
-            }
-        }
-    }
-}
@@ -0,0 +1 @@
+HELLO
				`@@ -0,0 +1 @@`
				`error: custom toolchain 'esp' specified in override file '/project/rust-toolchain.toml' is not installed`