crop mode and nav mode
This commit is contained in:
@@ -12,6 +12,9 @@ default = []
|
||||
# Build in nav-only mode: no H.264 video decode, only turn-by-turn text.
|
||||
# Saves ~300KB+ PSRAM and significant CPU. No esp_h264 component needed.
|
||||
nav-only = ["dep:miniz_oxide"]
|
||||
# Crop video: take center 480×320 from 800×480 instead of downscaling.
|
||||
# Eliminates bilinear scaling — 1:1 pixel copy is ~40% faster.
|
||||
crop-video = []
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
|
||||
@@ -9,6 +9,7 @@ BINARY_NAME="esp32-android-auto-nav"
|
||||
# Parse arguments
|
||||
BUILD_ONLY=false
|
||||
NAV_ONLY=false
|
||||
CROP_VIDEO=false
|
||||
CARGO_FEATURES=""
|
||||
|
||||
for arg in "$@"; do
|
||||
@@ -21,22 +22,36 @@ for arg in "$@"; do
|
||||
NAV_ONLY=true
|
||||
shift
|
||||
;;
|
||||
-c|--crop)
|
||||
CROP_VIDEO=true
|
||||
shift
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: ./build.sh [OPTIONS]"
|
||||
echo "Options:"
|
||||
echo " -b, --build-only, --no-flash Build only, skip flashing prompt"
|
||||
echo " -n, --nav-only Nav-only mode: text turn-by-turn, no video"
|
||||
echo " -c, --crop Crop mode: center-crop 480×320 from 800×480 (faster)"
|
||||
echo " -h, --help Show this help message"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Build feature list
|
||||
FEATURE_LIST=""
|
||||
if [ "$NAV_ONLY" = true ]; then
|
||||
CARGO_FEATURES="--features nav-only"
|
||||
FEATURE_LIST="nav-only"
|
||||
echo "📍 Mode: NAV-ONLY (turn-by-turn text, no H.264 video)"
|
||||
elif [ "$CROP_VIDEO" = true ]; then
|
||||
FEATURE_LIST="crop-video"
|
||||
echo "🎬 Mode: CROP VIDEO (center 480×320 from 800×480, no scaling)"
|
||||
else
|
||||
echo "🎬 Mode: FULL VIDEO (H.264 decode + display)"
|
||||
echo "🎬 Mode: FULL VIDEO (H.264 decode + downscale + display)"
|
||||
fi
|
||||
|
||||
if [ -n "$FEATURE_LIST" ]; then
|
||||
CARGO_FEATURES="--features $FEATURE_LIST"
|
||||
fi
|
||||
|
||||
echo "🔨 Building $BINARY_NAME (release)..."
|
||||
|
||||
+13
-31
@@ -16,15 +16,9 @@ CONFIG_SPIRAM=y
|
||||
CONFIG_SPIRAM_MODE_QUAD=y
|
||||
CONFIG_SPIRAM_SPEED_80M=y
|
||||
CONFIG_SPIRAM_USE_MALLOC=y
|
||||
# Allocations <= 4KB go to internal DRAM, larger ones to PSRAM.
|
||||
# The new strip-by-strip pipeline eliminates the 300KB PSRAM VideoFrame —
|
||||
# only the esp_h264 decoder's internal I420 buffers (~576KB) live in PSRAM.
|
||||
CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096
|
||||
# Reserve internal memory for DMA buffers (76.8KB) + ESP-IDF critical allocations
|
||||
CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
|
||||
# Allow thread stacks in PSRAM (decode+display thread)
|
||||
CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=y
|
||||
# Skip PSRAM memtest on boot (saves ~500ms startup)
|
||||
CONFIG_SPIRAM_MEMTEST=n
|
||||
|
||||
# Data Cache — maximize for PSRAM performance (H.264 decode reads PSRAM constantly)
|
||||
@@ -42,10 +36,8 @@ CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
|
||||
# LCD I80 bus — use PLL clock source for stable 40MHz pixel clock
|
||||
CONFIG_LCD_PERIPH_CLK_SRC_PLL160M=y
|
||||
|
||||
# Bluetooth — BLE only (ESP32-S3 does NOT support Bluetooth Classic)
|
||||
CONFIG_BT_ENABLED=y
|
||||
CONFIG_BT_BLE_ENABLED=y
|
||||
CONFIG_BT_NIMBLE_ENABLED=y
|
||||
# Bluetooth — DISABLED to save ~20KB DRAM
|
||||
CONFIG_BT_ENABLED=n
|
||||
|
||||
# WiFi — minimize internal SRAM usage (leave room for DMA buffers)
|
||||
CONFIG_ESP_WIFI_ENABLED=y
|
||||
@@ -58,34 +50,28 @@ CONFIG_ESP_WIFI_RX_BA_WIN=4
|
||||
CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
|
||||
|
||||
# H.264 software decoder (esp_h264 component)
|
||||
# Dual-task decoder for better FPS on ESP32-S3
|
||||
CONFIG_ESP_H264_DECODER_IRAM=1
|
||||
CONFIG_ESP_H264_DUAL_TASK=1
|
||||
CONFIG_ESP_H264_DECODER_IRAM=y
|
||||
CONFIG_ESP_H264_DUAL_TASK=y
|
||||
CONFIG_ESP_H264_DUAL_TASK_CORE=1
|
||||
CONFIG_ESP_H264_DUAL_TASK_PRIORITY=19
|
||||
|
||||
# TLS — mbedtls for Android Auto TLS handshake
|
||||
CONFIG_MBEDTLS_TLS_CLIENT=y
|
||||
CONFIG_MBEDTLS_TLS_SERVER=y
|
||||
CONFIG_MBEDTLS_SSL_ALPN=y
|
||||
# Disable cert bundle — we only use our own AA cert, and server verify is NONE.
|
||||
# The full bundle wastes ~60KB of heap when parsed.
|
||||
CONFIG_MBEDTLS_CERTIFICATE_BUNDLE=n
|
||||
CONFIG_MBEDTLS_HARDWARE_AES=y
|
||||
CONFIG_MBEDTLS_HARDWARE_SHA=y
|
||||
CONFIG_MBEDTLS_KEY_EXCHANGE_RSA=y
|
||||
# Use default allocator (not internal-only) — RSA MPI needs >32KB of temp buffers
|
||||
# and 64KB dcache + 32KB icache + 77KB DMA staging exhaust internal DRAM.
|
||||
# PSRAM is fine for one-time handshake; AES-GCM encrypt/decrypt uses HW accel.
|
||||
CONFIG_MBEDTLS_DEFAULT_MEM_ALLOC=y
|
||||
|
||||
# TCP/IP — larger window for video streaming throughput
|
||||
# 32KB window + 32KB send buffer reduces TCP stalls when phone sends
|
||||
# bursty H.264 data. LWIP buffers go to PSRAM (SPIRAM_TRY_ALLOCATE).
|
||||
# TCP/IP — 16KB windows for video streaming
|
||||
CONFIG_LWIP_MAX_SOCKETS=10
|
||||
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=32768
|
||||
CONFIG_LWIP_TCP_WND_DEFAULT=32768
|
||||
CONFIG_LWIP_TCP_RECVMBOX_SIZE=32
|
||||
CONFIG_LWIP_TCP_SND_BUF_DEFAULT=16384
|
||||
CONFIG_LWIP_TCP_WND_DEFAULT=16384
|
||||
CONFIG_LWIP_TCP_RECVMBOX_SIZE=16
|
||||
|
||||
# Logging — disable dynamic level checks (~10× faster log calls)
|
||||
# Logging — disable dynamic level checks
|
||||
CONFIG_LOG_DEFAULT_LEVEL_INFO=y
|
||||
CONFIG_LOG_DYNAMIC_LEVEL_CONTROL=n
|
||||
CONFIG_LOG_TAG_LEVEL_IMPL_NONE=y
|
||||
@@ -93,10 +79,6 @@ CONFIG_LOG_TAG_LEVEL_IMPL_NONE=y
|
||||
# FreeRTOS — 1ms ticks for responsive scheduling
|
||||
CONFIG_FREERTOS_HZ=1000
|
||||
|
||||
# Task watchdog — 10s for heavy decode workload
|
||||
CONFIG_ESP_TASK_WDT_TIMEOUT_S=10
|
||||
# Disable interrupt WDT on Core 1 — long DMA waits during video decode are normal
|
||||
# Task watchdog — 15s for heavy decode workload
|
||||
CONFIG_ESP_TASK_WDT_TIMEOUT_S=15
|
||||
CONFIG_ESP_INT_WDT_CHECK_CPU1=n
|
||||
|
||||
# mDNS — for Android Auto service discovery (_androidauto._tcp)
|
||||
CONFIG_MDNS_MAX_SERVICES=4
|
||||
|
||||
+120
-4
@@ -522,19 +522,48 @@ pub fn i420_to_rgb565_strip(
|
||||
let v_row = unsafe { v_plane.add(uv_row_off) };
|
||||
let out_off = dy_local * dst_w_us;
|
||||
|
||||
for dx in 0..dst_w_us {
|
||||
// Process 2 pixels per iteration — adjacent pixels often share
|
||||
// the same UV values, and this halves loop overhead.
|
||||
let mut dx = 0usize;
|
||||
let dst_w_pairs = dst_w_us & !1; // round down to even
|
||||
while dx < dst_w_pairs {
|
||||
unsafe {
|
||||
let src_x0 = *x_map.get_unchecked(dx) as usize;
|
||||
let src_x1 = *x_map.get_unchecked(dx + 1) as usize;
|
||||
let uv_x0 = src_x0 >> 1;
|
||||
let uv_x1 = src_x1 >> 1;
|
||||
|
||||
// Pixel 0
|
||||
let y_val0 = *lut.y_r.get_unchecked(*y_row.add(src_x0) as usize);
|
||||
let u_idx0 = *u_row.add(uv_x0) as usize;
|
||||
let v_idx0 = *v_row.add(uv_x0) as usize;
|
||||
let r0 = clamp8(y_val0 + *lut.v_r.get_unchecked(v_idx0));
|
||||
let g0 = clamp8(y_val0 - *lut.v_g.get_unchecked(v_idx0) - *lut.u_g.get_unchecked(u_idx0));
|
||||
let b0 = clamp8(y_val0 + *lut.u_b.get_unchecked(u_idx0));
|
||||
*out.get_unchecked_mut(out_off + dx) = ((r0 >> 3) << 11) | ((g0 >> 2) << 5) | (b0 >> 3);
|
||||
|
||||
// Pixel 1
|
||||
let y_val1 = *lut.y_r.get_unchecked(*y_row.add(src_x1) as usize);
|
||||
let u_idx1 = *u_row.add(uv_x1) as usize;
|
||||
let v_idx1 = *v_row.add(uv_x1) as usize;
|
||||
let r1 = clamp8(y_val1 + *lut.v_r.get_unchecked(v_idx1));
|
||||
let g1 = clamp8(y_val1 - *lut.v_g.get_unchecked(v_idx1) - *lut.u_g.get_unchecked(u_idx1));
|
||||
let b1 = clamp8(y_val1 + *lut.u_b.get_unchecked(u_idx1));
|
||||
*out.get_unchecked_mut(out_off + dx + 1) = ((r1 >> 3) << 11) | ((g1 >> 2) << 5) | (b1 >> 3);
|
||||
}
|
||||
dx += 2;
|
||||
}
|
||||
// Handle odd last pixel if display width is odd
|
||||
if dx < dst_w_us {
|
||||
unsafe {
|
||||
let src_x = *x_map.get_unchecked(dx) as usize;
|
||||
let uv_x = src_x >> 1;
|
||||
|
||||
let y_val = *lut.y_r.get_unchecked(*y_row.add(src_x) as usize);
|
||||
let u_idx = *u_row.add(uv_x) as usize;
|
||||
let v_idx = *v_row.add(uv_x) as usize;
|
||||
|
||||
let r = clamp8(y_val + *lut.v_r.get_unchecked(v_idx));
|
||||
let g = clamp8(y_val - *lut.v_g.get_unchecked(v_idx) - *lut.u_g.get_unchecked(u_idx));
|
||||
let b = clamp8(y_val + *lut.u_b.get_unchecked(u_idx));
|
||||
|
||||
*out.get_unchecked_mut(out_off + dx) = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
|
||||
}
|
||||
}
|
||||
@@ -606,6 +635,93 @@ pub fn i420_to_rgb565_bilinear(
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert a horizontal strip of I420 to RGB565 using center-crop (no scaling).
|
||||
///
|
||||
/// Extracts the center `dst_w × dst_h` region from the `src_w × src_h` frame.
|
||||
/// Each I420 pixel is converted 1:1 to RGB565 — no bilinear/nearest-neighbor
|
||||
/// interpolation, no scaling math. This is ~40% faster than the downscale path
|
||||
/// because the inner loop is a simple sequential read with no x_map lookup.
|
||||
///
|
||||
/// `dst_y_start` / `strip_h`: which output rows to produce (for strip-based DMA).
|
||||
/// `out`: DMA staging buffer, must hold `dst_w * strip_h` u16 entries.
|
||||
#[cfg(feature = "crop-video")]
|
||||
pub fn i420_to_rgb565_strip_crop(
|
||||
i420: &[u8],
|
||||
src_w: u32,
|
||||
src_h: u32,
|
||||
dst_w: u32,
|
||||
dst_h: u32,
|
||||
dst_y_start: u32,
|
||||
strip_h: u32,
|
||||
out: &mut [u16],
|
||||
) {
|
||||
let lut = get_yuv_lut();
|
||||
let src_pixels = (src_w * src_h) as usize;
|
||||
let y_plane = i420.as_ptr();
|
||||
let u_plane = unsafe { y_plane.add(src_pixels) };
|
||||
let v_plane = unsafe { u_plane.add(src_pixels / 4) };
|
||||
let uv_stride = (src_w / 2) as usize;
|
||||
let dst_w_us = dst_w as usize;
|
||||
|
||||
// Crop offsets: center the dst region within the src frame
|
||||
let crop_x = ((src_w - dst_w) / 2) as usize;
|
||||
let crop_y = ((src_h - dst_h) / 2) as usize;
|
||||
|
||||
for dy_local in 0..strip_h as usize {
|
||||
let dy = dst_y_start as usize + dy_local;
|
||||
if dy >= dst_h as usize {
|
||||
break;
|
||||
}
|
||||
|
||||
let src_y = crop_y + dy;
|
||||
let y_row = unsafe { y_plane.add(src_y * src_w as usize + crop_x) };
|
||||
let uv_row_off = (src_y / 2) * uv_stride + crop_x / 2;
|
||||
let u_row = unsafe { u_plane.add(uv_row_off) };
|
||||
let v_row = unsafe { v_plane.add(uv_row_off) };
|
||||
let out_off = dy_local * dst_w_us;
|
||||
|
||||
// 1:1 pixel copy — no scaling, just YUV→RGB565 conversion.
|
||||
// Process 2 pixels at a time (share UV for adjacent pixel pairs).
|
||||
let mut dx = 0usize;
|
||||
let dst_w_pairs = dst_w_us & !1;
|
||||
while dx < dst_w_pairs {
|
||||
unsafe {
|
||||
let uv_x0 = dx >> 1;
|
||||
let uv_x1 = (dx + 1) >> 1;
|
||||
|
||||
let y_val0 = *lut.y_r.get_unchecked(*y_row.add(dx) as usize);
|
||||
let u_idx0 = *u_row.add(uv_x0) as usize;
|
||||
let v_idx0 = *v_row.add(uv_x0) as usize;
|
||||
let r0 = clamp8(y_val0 + *lut.v_r.get_unchecked(v_idx0));
|
||||
let g0 = clamp8(y_val0 - *lut.v_g.get_unchecked(v_idx0) - *lut.u_g.get_unchecked(u_idx0));
|
||||
let b0 = clamp8(y_val0 + *lut.u_b.get_unchecked(u_idx0));
|
||||
*out.get_unchecked_mut(out_off + dx) = ((r0 >> 3) << 11) | ((g0 >> 2) << 5) | (b0 >> 3);
|
||||
|
||||
let y_val1 = *lut.y_r.get_unchecked(*y_row.add(dx + 1) as usize);
|
||||
let u_idx1 = *u_row.add(uv_x1) as usize;
|
||||
let v_idx1 = *v_row.add(uv_x1) as usize;
|
||||
let r1 = clamp8(y_val1 + *lut.v_r.get_unchecked(v_idx1));
|
||||
let g1 = clamp8(y_val1 - *lut.v_g.get_unchecked(v_idx1) - *lut.u_g.get_unchecked(u_idx1));
|
||||
let b1 = clamp8(y_val1 + *lut.u_b.get_unchecked(u_idx1));
|
||||
*out.get_unchecked_mut(out_off + dx + 1) = ((r1 >> 3) << 11) | ((g1 >> 2) << 5) | (b1 >> 3);
|
||||
}
|
||||
dx += 2;
|
||||
}
|
||||
if dx < dst_w_us {
|
||||
unsafe {
|
||||
let uv_x = dx >> 1;
|
||||
let y_val = *lut.y_r.get_unchecked(*y_row.add(dx) as usize);
|
||||
let u_idx = *u_row.add(uv_x) as usize;
|
||||
let v_idx = *v_row.add(uv_x) as usize;
|
||||
let r = clamp8(y_val + *lut.v_r.get_unchecked(v_idx));
|
||||
let g = clamp8(y_val - *lut.v_g.get_unchecked(v_idx) - *lut.u_g.get_unchecked(u_idx));
|
||||
let b = clamp8(y_val + *lut.u_b.get_unchecked(u_idx));
|
||||
*out.get_unchecked_mut(out_off + dx) = ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
+148
-27
@@ -55,8 +55,10 @@ fn main() -> Result<()> {
|
||||
log::info!("=== ESP32 Android Auto Navigation Head Unit ===");
|
||||
#[cfg(feature = "nav-only")]
|
||||
log::info!("Mode: NAV-ONLY (turn-by-turn text, no video decode)");
|
||||
#[cfg(not(feature = "nav-only"))]
|
||||
log::info!("Mode: FULL VIDEO (H.264 decode + display)");
|
||||
#[cfg(all(not(feature = "nav-only"), feature = "crop-video"))]
|
||||
log::info!("Mode: CROP VIDEO (center 480×320 from 800×480, no scaling)");
|
||||
#[cfg(all(not(feature = "nav-only"), not(feature = "crop-video")))]
|
||||
log::info!("Mode: FULL VIDEO (H.264 decode + downscale + display)");
|
||||
|
||||
// Check PSRAM availability
|
||||
let free_psram = unsafe {
|
||||
@@ -133,7 +135,7 @@ fn main() -> Result<()> {
|
||||
#[cfg(not(feature = "nav-only"))]
|
||||
let decode_tx = {
|
||||
// Channel for raw H.264 NAL data → decode+display thread.
|
||||
let (decode_tx, decode_rx) = mpsc::sync_channel::<Vec<u8>>(2);
|
||||
let (decode_tx, decode_rx) = mpsc::sync_channel::<Vec<u8>>(4);
|
||||
|
||||
// Spawn navigation UI thread (log-only in video mode — LCD is owned by video)
|
||||
let _ui_thread = thread::Builder::new()
|
||||
@@ -862,11 +864,78 @@ fn png_unfilter(data: &mut [u8], width: usize, height: usize, channels: usize) {
|
||||
///
|
||||
/// Only compiled in video mode (not nav-only).
|
||||
#[cfg(not(feature = "nav-only"))]
|
||||
/// Work item sent to the converter helper thread.
|
||||
/// Contains raw pointers to shared I420 input and DMA output buffers.
|
||||
struct ConvertWork {
|
||||
i420_ptr: *const u8,
|
||||
i420_len: usize,
|
||||
src_w: u32,
|
||||
src_h: u32,
|
||||
dst_w: u32,
|
||||
dst_h: u32,
|
||||
dst_y_start: u32,
|
||||
strip_h: u32,
|
||||
out_ptr: *mut u16,
|
||||
out_len: usize,
|
||||
}
|
||||
|
||||
// SAFETY: Pointers are valid for the duration of the work item.
|
||||
// The main thread waits for `done_rx` before touching the buffers again.
|
||||
unsafe impl Send for ConvertWork {}
|
||||
|
||||
/// Converter worker thread — sits on core opposite to the decode thread.
|
||||
/// Receives half-strip conversion jobs and signals completion.
|
||||
fn converter_worker(
|
||||
rx: mpsc::Receiver<ConvertWork>,
|
||||
done_tx: mpsc::SyncSender<()>,
|
||||
) {
|
||||
loop {
|
||||
let work = match rx.recv() {
|
||||
Ok(w) => w,
|
||||
Err(_) => return,
|
||||
};
|
||||
unsafe {
|
||||
let i420 = core::slice::from_raw_parts(work.i420_ptr, work.i420_len);
|
||||
let out = core::slice::from_raw_parts_mut(work.out_ptr, work.out_len);
|
||||
#[cfg(feature = "crop-video")]
|
||||
decoder::i420_to_rgb565_strip_crop(
|
||||
i420,
|
||||
work.src_w, work.src_h,
|
||||
work.dst_w, work.dst_h,
|
||||
work.dst_y_start, work.strip_h,
|
||||
out,
|
||||
);
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
decoder::i420_to_rgb565_strip(
|
||||
i420,
|
||||
work.src_w, work.src_h,
|
||||
work.dst_w, work.dst_h,
|
||||
work.dst_y_start, work.strip_h,
|
||||
out,
|
||||
);
|
||||
}
|
||||
let _ = done_tx.send(());
|
||||
}
|
||||
}
|
||||
|
||||
fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display) {
|
||||
log::info!("Decode+display thread started (strip-by-strip direct-to-DMA)");
|
||||
log::info!("Decode+display thread started (display every frame)");
|
||||
|
||||
// Non-crop mode: spawn converter helper for dual-core strip splitting
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
let (work_tx, work_rx) = mpsc::sync_channel::<ConvertWork>(1);
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
let (done_tx, done_rx) = mpsc::sync_channel::<()>(1);
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
let _converter = thread::Builder::new()
|
||||
.name("converter".into())
|
||||
.stack_size(4096)
|
||||
.spawn(move || converter_worker(work_rx, done_tx))
|
||||
.expect("converter thread");
|
||||
|
||||
let mut dec: Option<decoder::H264Decoder> = None;
|
||||
let mut frame_count: u64 = 0;
|
||||
let mut display_count: u64 = 0;
|
||||
let mut skip_count: u64 = 0;
|
||||
let strip_h: u32 = display::STRIP_LINES as u32;
|
||||
|
||||
@@ -880,16 +949,16 @@ fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display
|
||||
}
|
||||
};
|
||||
|
||||
// Drain all queued chunks: decode each one to maintain H.264 state,
|
||||
// but skip the expensive YUV→RGB565 conversion (discard mode).
|
||||
// Only the latest chunk will get full conversion + display.
|
||||
// Drain all queued chunks: discard older frames and keep only
|
||||
// the latest. We do NOT decode discarded frames — each
|
||||
// esp_h264_dec_process call takes ~300ms for 800×480, so
|
||||
// decoding throwaway frames was the #1 bottleneck (0.9 fps!).
|
||||
// P-frames may glitch briefly until the next keyframe, but
|
||||
// that's far better than wasting 1+ second per iteration.
|
||||
loop {
|
||||
match decode_rx.try_recv() {
|
||||
Ok(next) => {
|
||||
if let Some(d) = &mut dec {
|
||||
let _ = d.decode_into(&data, &mut []); // decode-only
|
||||
skip_count += 1;
|
||||
}
|
||||
skip_count += 1;
|
||||
data = next;
|
||||
}
|
||||
Err(_) => break,
|
||||
@@ -912,11 +981,14 @@ fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display
|
||||
let d = dec.as_mut().unwrap();
|
||||
|
||||
// Decode the latest NAL → get raw I420 pointer
|
||||
let t0 = unsafe { esp_idf_sys::esp_timer_get_time() };
|
||||
match d.decode_raw(&data) {
|
||||
Ok(Some((i420_ptr, i420_len))) => {
|
||||
let t1 = unsafe { esp_idf_sys::esp_timer_get_time() };
|
||||
frame_count += 1;
|
||||
|
||||
if frame_count % 60 == 1 {
|
||||
display_count += 1;
|
||||
if display_count % 30 == 1 {
|
||||
let free_psram = unsafe {
|
||||
esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_SPIRAM)
|
||||
};
|
||||
@@ -924,36 +996,85 @@ fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display
|
||||
esp_idf_sys::heap_caps_get_free_size(esp_idf_sys::MALLOC_CAP_INTERNAL)
|
||||
};
|
||||
log::info!(
|
||||
"Frame #{} (skipped {}, PSRAM {}KB, DRAM {}KB free)",
|
||||
frame_count, skip_count, free_psram / 1024, free_dram / 1024,
|
||||
"Display #{} (decoded {}, skipped {}, PSRAM {}KB, DRAM {}KB free)",
|
||||
display_count, frame_count, skip_count, free_psram / 1024, free_dram / 1024,
|
||||
);
|
||||
}
|
||||
|
||||
// SAFETY: I420 data is component-owned, valid until next decode call.
|
||||
// We consume it fully here before the next loop iteration.
|
||||
let i420 = unsafe { core::slice::from_raw_parts(i420_ptr, i420_len) };
|
||||
|
||||
let src_w = d.source_width();
|
||||
let src_h = d.source_height();
|
||||
let (dst_w, dst_h) = d.output_dimensions();
|
||||
|
||||
// Strip-by-strip: convert I420→RGB565 directly into DMA staging
|
||||
// SRAM buffers and push to LCD. Alternating buffers overlap
|
||||
// DMA transfer with CPU conversion (double-buffered pipeline).
|
||||
// Strip-by-strip rendering.
|
||||
// Crop mode: single-threaded 1:1 copy (trivially fast, ~2ms/strip).
|
||||
// Scale mode: dual-core split (worker + main process half each).
|
||||
// Double-buffered DMA pipeline overlaps transfer with computation.
|
||||
let mut buf_idx: usize = 0;
|
||||
for y in (0..dst_h).step_by(strip_h as usize) {
|
||||
let h = strip_h.min(dst_h - y);
|
||||
let dma_buf = lcd.dma_stage_mut(buf_idx);
|
||||
|
||||
decoder::i420_to_rgb565_strip(
|
||||
i420,
|
||||
d.source_width(), d.source_height(),
|
||||
dst_w, dst_h,
|
||||
y, h,
|
||||
dma_buf,
|
||||
);
|
||||
#[cfg(feature = "crop-video")]
|
||||
{
|
||||
// Crop: simple 1:1 copy, no dual-core needed
|
||||
decoder::i420_to_rgb565_strip_crop(
|
||||
i420,
|
||||
src_w, src_h,
|
||||
dst_w, dst_h,
|
||||
y, h,
|
||||
&mut dma_buf[..(h * dst_w) as usize],
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
{
|
||||
// Scale: split strip in half for dual-core
|
||||
let half_h = h / 2;
|
||||
let lower_h = h - half_h;
|
||||
|
||||
// Send top half to converter worker
|
||||
let _ = work_tx.send(ConvertWork {
|
||||
i420_ptr: i420.as_ptr(),
|
||||
i420_len: i420.len(),
|
||||
src_w, src_h,
|
||||
dst_w, dst_h,
|
||||
dst_y_start: y,
|
||||
strip_h: half_h,
|
||||
out_ptr: dma_buf.as_mut_ptr(),
|
||||
out_len: (half_h * dst_w) as usize,
|
||||
});
|
||||
|
||||
// Convert bottom half on this thread
|
||||
let lower_offset = (half_h * dst_w) as usize;
|
||||
decoder::i420_to_rgb565_strip(
|
||||
i420,
|
||||
src_w, src_h,
|
||||
dst_w, dst_h,
|
||||
y + half_h, lower_h,
|
||||
&mut dma_buf[lower_offset..lower_offset + (lower_h * dst_w) as usize],
|
||||
);
|
||||
|
||||
// Wait for worker to finish top half
|
||||
let _ = done_rx.recv();
|
||||
}
|
||||
|
||||
lcd.flush_strip(y as u16, h as u16, buf_idx);
|
||||
buf_idx ^= 1;
|
||||
}
|
||||
|
||||
let t2 = unsafe { esp_idf_sys::esp_timer_get_time() };
|
||||
if display_count % 30 == 1 {
|
||||
let decode_ms = (t1 - t0) / 1000;
|
||||
let render_ms = (t2 - t1) / 1000;
|
||||
log::info!("⏱ decode={}ms render={}ms total={}ms", decode_ms, render_ms, decode_ms + render_ms);
|
||||
}
|
||||
|
||||
// Yield to IDLE0 after each frame so the task watchdog
|
||||
// doesn't trigger (decode + convert can take 300ms+).
|
||||
unsafe { esp_idf_sys::vTaskDelay(1); }
|
||||
}
|
||||
Ok(None) => {} // SPS/PPS/SEI — no image data (normal at stream start)
|
||||
Err(e) => {
|
||||
@@ -963,7 +1084,7 @@ fn decode_display_loop(decode_rx: mpsc::Receiver<Vec<u8>>, lcd: display::Display
|
||||
}
|
||||
}
|
||||
|
||||
/// Touch polling loop — reads FT6336U at ~30Hz and sends events.
|
||||
/// Touch polling loop — reads FT6336U at ~60Hz and sends events.
|
||||
fn touch_poll_loop(mut touch: touch::Touch<'static>, tx: mpsc::Sender<touch::TouchEvent>) {
|
||||
log::info!("Touch polling thread started");
|
||||
loop {
|
||||
@@ -971,6 +1092,6 @@ fn touch_poll_loop(mut touch: touch::Touch<'static>, tx: mpsc::Sender<touch::Tou
|
||||
log::debug!("👆 Touch: ({}, {}) pressed={}", event.x, event.y, event.pressed);
|
||||
let _ = tx.send(event);
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(33)); // ~30Hz
|
||||
std::thread::sleep(Duration::from_millis(16)); // ~60Hz
|
||||
}
|
||||
}
|
||||
|
||||
+65
-15
@@ -8,7 +8,9 @@
|
||||
//! 4. Channel message dispatching (navigation, video stub, audio stub, etc.)
|
||||
|
||||
use std::io::{Read, Write};
|
||||
use std::net::TcpStream;
|
||||
use std::sync::mpsc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result, bail};
|
||||
use protobuf::{Enum, Message};
|
||||
@@ -60,8 +62,8 @@ impl Default for ChannelMap {
|
||||
/// `nav_tx` sends navigation events to the UI thread.
|
||||
/// `decode_tx` sends raw H.264 NAL data to the long-lived decode+display thread.
|
||||
/// `touch_rx` receives touch events from the touch polling thread.
|
||||
pub fn run_session<S: Read + Write>(
|
||||
stream: &mut S,
|
||||
pub fn run_session(
|
||||
stream: &mut TcpStream,
|
||||
config: &HeadUnitConfig,
|
||||
nav_tx: &mpsc::Sender<NavEvent>,
|
||||
decode_tx: &mpsc::SyncSender<Vec<u8>>,
|
||||
@@ -90,25 +92,62 @@ pub fn run_session<S: Read + Write>(
|
||||
let mut touch_pressed: bool = false;
|
||||
let mut touch_event_count: u64 = 0;
|
||||
let mut loop_count: u32 = 0;
|
||||
let mut last_focus_kick_us: u64 = 0;
|
||||
|
||||
// Set read timeout for peek polling — allows us to drain touch
|
||||
// events every ~50ms even when the phone isn't sending data.
|
||||
stream.set_read_timeout(Some(Duration::from_millis(50)))?;
|
||||
|
||||
loop {
|
||||
let frame = reader.read_frame(stream, &mut tls)?;
|
||||
let channel_id = frame.header.channel_id;
|
||||
let is_control_bit = frame.header.frame.get_control();
|
||||
let is_encrypted = frame.header.frame.get_encryption();
|
||||
|
||||
// Yield every 10 iterations so IDLE0 can run (prevents task WDT)
|
||||
loop_count += 1;
|
||||
if loop_count % 10 == 0 {
|
||||
std::thread::yield_now();
|
||||
}
|
||||
|
||||
// Drain any pending touch events every iteration (video arrives ~30fps,
|
||||
// so touch latency is at most ~33ms — good enough for interaction).
|
||||
// Drain any pending touch events BEFORE blocking on read.
|
||||
// This ensures touch events are sent promptly even when
|
||||
// the phone isn't sending data.
|
||||
let mut sent_touch = false;
|
||||
while let Ok(te) = touch_rx.try_recv() {
|
||||
send_touch_event(stream, &mut tls, ch.input, te, &mut touch_pressed, &mut touch_event_count)?;
|
||||
sent_touch = true;
|
||||
}
|
||||
|
||||
// If touch events were sent while phone may be idle, send
|
||||
// VideoFocusIndication to prompt it to render and send fresh frames.
|
||||
if sent_touch && video_session.is_some() {
|
||||
let now_us = unsafe { esp_idf_sys::esp_timer_get_time() } as u64;
|
||||
if now_us.wrapping_sub(last_focus_kick_us) > 500_000 {
|
||||
let kick = channels::video_focus_frame_unrequested(ch.video, true);
|
||||
if let Err(e) = frame::write_frame(stream, &kick, &mut tls) {
|
||||
log::warn!("Video focus kick failed: {:?}", e);
|
||||
}
|
||||
last_focus_kick_us = now_us;
|
||||
}
|
||||
}
|
||||
|
||||
// Peek to check if data is available (blocks up to 50ms).
|
||||
// peek() doesn't consume data, so partial-read corruption is impossible.
|
||||
let mut peek_buf = [0u8; 1];
|
||||
match stream.peek(&mut peek_buf) {
|
||||
Ok(_) => {} // Data available — proceed to read frame
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock
|
||||
|| e.kind() == std::io::ErrorKind::TimedOut => {
|
||||
continue; // No data — loop back to drain touch events
|
||||
}
|
||||
Err(e) => return Err(e).context("peeking socket for data"),
|
||||
}
|
||||
|
||||
// Data is on the socket — read the full frame with generous timeout
|
||||
stream.set_read_timeout(Some(Duration::from_secs(5)))?;
|
||||
let frame = reader.read_frame(stream, &mut tls)?;
|
||||
stream.set_read_timeout(Some(Duration::from_millis(50)))?;
|
||||
|
||||
let channel_id = frame.header.channel_id;
|
||||
let is_control_bit = frame.header.frame.get_control();
|
||||
let is_encrypted = frame.header.frame.get_encryption();
|
||||
|
||||
// Log incoming frames at debug level (very high volume)
|
||||
log::debug!(
|
||||
"⬅️ ch={} ctrl={} enc={} len={} data={:02x?}",
|
||||
@@ -434,12 +473,23 @@ fn send_touch_event<S: Read + Write>(
|
||||
// FT6336U on WT32-SC01 Plus with MADCTL MV|MY (landscape):
|
||||
// raw_x: 0..319 maps to display Y (top→bottom)
|
||||
// raw_y: 0..479 maps to display X (right→left, inverted)
|
||||
// Verified empirically:
|
||||
// bottom-right raw(273,11) → AA(780,409) ✓
|
||||
// bottom-left raw(291,446) → AA(55,436) ✓
|
||||
// top-left raw(16,453) → AA(43,24) ✓
|
||||
let aa_x = (479u32.saturating_sub(te.y as u32)) * 800 / 480;
|
||||
let aa_y = (te.x as u32) * 480 / 320;
|
||||
//
|
||||
// In crop mode, the display shows the center 480×320 of the 800×480 frame,
|
||||
// so we add the crop offset (160, 80) to map from display to AA coordinates.
|
||||
// In downscale mode, the display shows the full 800×480 scaled to 480×320.
|
||||
#[cfg(feature = "crop-video")]
|
||||
let (aa_x, aa_y) = {
|
||||
let disp_x = 479u32.saturating_sub(te.y as u32);
|
||||
let disp_y = te.x as u32;
|
||||
// Crop offset: (800-480)/2 = 160, (480-320)/2 = 80
|
||||
(disp_x + 160, disp_y + 80)
|
||||
};
|
||||
#[cfg(not(feature = "crop-video"))]
|
||||
let (aa_x, aa_y) = {
|
||||
let aa_x = (479u32.saturating_sub(te.y as u32)) * 800 / 480;
|
||||
let aa_y = (te.x as u32) * 480 / 320;
|
||||
(aa_x, aa_y)
|
||||
};
|
||||
|
||||
let action = if te.pressed {
|
||||
if *touch_pressed {
|
||||
|
||||
Reference in New Issue
Block a user