diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e5bf6d18..ab6003603 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - **`provision.py` esptool v5 compat** (#391) — Stale `write_flash` (underscore) syntax in the dry-run manual-flash hint now uses `write-flash` (hyphenated) for esptool >= 5.x. The primary flash command was already correct. - **`provision.py` silent NVS wipe** (#391) — The script replaces the entire `csi_cfg` NVS namespace on every run, so partial invocations were silently erasing WiFi credentials and causing `Retrying WiFi connection (10/10)` in the field. Now refuses to run without `--ssid`, `--password`, and `--target-ip` unless `--force-partial` is passed. `--force-partial` prints a warning listing which keys will be wiped. +- **Firmware: defensive `node_id` capture** (#232, #375, #385, #386, #390) — Users on multi-node deployments reported `node_id` reverting to the Kconfig default (`1`) in UDP frames and in the `csi_collector` init log, despite NVS loading the correct value. The root cause (memory corruption of `g_nvs_config`) has not been definitively isolated, but the UDP frame header is now tamper-proof: `csi_collector_init()` captures `g_nvs_config.node_id` into a module-local `s_node_id` once, and `csi_serialize_frame()` plus all other consumers (`edge_processing.c`, `wasm_runtime.c`, `display_ui.c`, `swarm_bridge_init`) read it via the new `csi_collector_get_node_id()` accessor. A canary logs `WARN` if `g_nvs_config.node_id` diverges from `s_node_id` at end-of-init, helping isolate the upstream corruption path. Validated on attached ESP32-S3 (COM8): NVS `node_id=2` propagates through boot log, capture log, init log, and byte[4] of every UDP frame. ### Docs - **CHANGELOG catch-up** (#367) — Added missing entries for v0.5.5, v0.6.0, and v0.7.0 releases. diff --git a/firmware/esp32-csi-node/main/csi_collector.c b/firmware/esp32-csi-node/main/csi_collector.c index e13fabcab..ba5745376 100644 --- a/firmware/esp32-csi-node/main/csi_collector.c +++ b/firmware/esp32-csi-node/main/csi_collector.c @@ -25,6 +25,14 @@ /* ADR-060: Access the global NVS config for MAC filter and channel override. */ extern nvs_config_t g_nvs_config; +/* Defensive fix (#232, #375, #385, #386, #390): capture node_id at init-time + * into a module-local static. Using the global g_nvs_config.node_id directly + * at every callback is vulnerable to any memory corruption that clobbers the + * struct (which users have reported reverting node_id to the Kconfig default + * of 1). The local copy is set once at csi_collector_init() and then used + * exclusively by csi_serialize_frame(). */ +static uint8_t s_node_id = 1; + /* ADR-057: Build-time guard — fail early if CSI is not enabled in sdkconfig. * Without this, the firmware compiles but crashes at runtime with: * "E (xxxx) wifi:CSI not enabled in menuconfig!" @@ -117,8 +125,9 @@ size_t csi_serialize_frame(const wifi_csi_info_t *info, uint8_t *buf, size_t buf uint32_t magic = CSI_MAGIC; memcpy(&buf[0], &magic, 4); - /* Node ID (from NVS runtime config, not compile-time Kconfig) */ - buf[4] = g_nvs_config.node_id; + /* Node ID (captured at init into s_node_id to survive memory corruption + * that could clobber g_nvs_config.node_id - see #232/#375/#385/#390). */ + buf[4] = s_node_id; /* Number of antennas */ buf[5] = n_antennas; @@ -215,6 +224,13 @@ static void wifi_promiscuous_cb(void *buf, wifi_promiscuous_pkt_type_t type) void csi_collector_init(void) { + /* Capture node_id into module-local static at init time. After this point + * csi_serialize_frame() uses s_node_id exclusively, isolating the UDP + * frame node_id field from any memory corruption of g_nvs_config. */ + s_node_id = g_nvs_config.node_id; + ESP_LOGI(TAG, "Captured node_id=%u at init (defensive copy for #232/#375/#385/#390)", + (unsigned)s_node_id); + /* ADR-060: Determine the CSI channel. * Priority: 1) NVS override (--channel), 2) connected AP channel, 3) Kconfig default. */ uint8_t csi_channel = (uint8_t)CONFIG_CSI_WIFI_CHANNEL; @@ -272,8 +288,24 @@ void csi_collector_init(void) g_nvs_config.filter_mac[4], g_nvs_config.filter_mac[5]); } - ESP_LOGI(TAG, "CSI collection initialized (node_id=%d, channel=%u)", - g_nvs_config.node_id, (unsigned)csi_channel); + ESP_LOGI(TAG, "CSI collection initialized (node_id=%u, channel=%u)", + (unsigned)s_node_id, (unsigned)csi_channel); + + /* Clobber-detection canary: if g_nvs_config.node_id no longer matches the + * value we captured, something corrupted the struct between nvs_config_load + * and here. This is the historic #232/#375 symptom. */ + if (g_nvs_config.node_id != s_node_id) { + ESP_LOGW(TAG, "node_id clobber detected: captured=%u but g_nvs_config=%u " + "(frames will use captured value %u). Please report to #390.", + (unsigned)s_node_id, (unsigned)g_nvs_config.node_id, + (unsigned)s_node_id); + } +} + +/* Accessor for other modules that need the authoritative runtime node_id. */ +uint8_t csi_collector_get_node_id(void) +{ + return s_node_id; } /* ---- ADR-029: Channel hopping ---- */ diff --git a/firmware/esp32-csi-node/main/csi_collector.h b/firmware/esp32-csi-node/main/csi_collector.h index d1fa51171..3bdfd1484 100644 --- a/firmware/esp32-csi-node/main/csi_collector.h +++ b/firmware/esp32-csi-node/main/csi_collector.h @@ -29,6 +29,18 @@ */ void csi_collector_init(void); +/** + * Get the runtime node_id captured at csi_collector_init(). + * + * This is a defensive copy of g_nvs_config.node_id taken at init time. Other + * modules (edge_processing, wasm_runtime, display_ui) should prefer this + * accessor over reading g_nvs_config.node_id directly, because the global + * struct can be clobbered by memory corruption (see #232, #375, #385, #390). + * + * @return Node ID (0-255) as loaded from NVS or Kconfig default at boot. + */ +uint8_t csi_collector_get_node_id(void); + /** * Serialize CSI data into ADR-018 binary frame format. * diff --git a/firmware/esp32-csi-node/main/display_ui.c b/firmware/esp32-csi-node/main/display_ui.c index 1ffd9e295..901867fbd 100644 --- a/firmware/esp32-csi-node/main/display_ui.c +++ b/firmware/esp32-csi-node/main/display_ui.c @@ -8,6 +8,7 @@ #include "display_ui.h" #include "nvs_config.h" +#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */ #include "sdkconfig.h" extern nvs_config_t g_nvs_config; @@ -350,7 +351,7 @@ void display_ui_update(void) { char buf[48]; - snprintf(buf, sizeof(buf), "Node: %d", g_nvs_config.node_id); + snprintf(buf, sizeof(buf), "Node: %u", (unsigned)csi_collector_get_node_id()); lv_label_set_text(s_sys_node, buf); snprintf(buf, sizeof(buf), "Heap: %lu KB free", diff --git a/firmware/esp32-csi-node/main/edge_processing.c b/firmware/esp32-csi-node/main/edge_processing.c index 3a5935406..ad5c87951 100644 --- a/firmware/esp32-csi-node/main/edge_processing.c +++ b/firmware/esp32-csi-node/main/edge_processing.c @@ -19,6 +19,7 @@ #include "edge_processing.h" #include "nvs_config.h" +#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */ #include "mmwave_sensor.h" /* Runtime config — declared in main.c, loaded from NVS at boot. */ @@ -441,7 +442,7 @@ static void send_compressed_frame(const uint8_t *iq_data, uint16_t iq_len, uint32_t magic = EDGE_COMPRESSED_MAGIC; memcpy(&pkt[0], &magic, 4); - pkt[4] = g_nvs_config.node_id; + pkt[4] = csi_collector_get_node_id(); /* #390: defensive copy */ pkt[5] = channel; memcpy(&pkt[6], &iq_len, 2); memcpy(&pkt[8], &comp_len, 2); @@ -557,7 +558,7 @@ static void send_vitals_packet(void) memset(&pkt, 0, sizeof(pkt)); pkt.magic = EDGE_VITALS_MAGIC; - pkt.node_id = g_nvs_config.node_id; + pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */ pkt.flags = 0; if (s_presence_detected) pkt.flags |= 0x01; @@ -647,7 +648,7 @@ static void send_feature_vector(void) memset(&pkt, 0, sizeof(pkt)); pkt.magic = EDGE_FEATURE_MAGIC; - pkt.node_id = g_nvs_config.node_id; + pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */ pkt.reserved = 0; pkt.seq = s_feature_seq++; pkt.timestamp_us = esp_timer_get_time(); diff --git a/firmware/esp32-csi-node/main/main.c b/firmware/esp32-csi-node/main/main.c index 6fc0b5e1e..631a0dbaf 100644 --- a/firmware/esp32-csi-node/main/main.c +++ b/firmware/esp32-csi-node/main/main.c @@ -267,7 +267,7 @@ void app_main(void) strncpy(swarm_cfg.seed_url, g_nvs_config.seed_url, sizeof(swarm_cfg.seed_url) - 1); strncpy(swarm_cfg.seed_token, g_nvs_config.seed_token, sizeof(swarm_cfg.seed_token) - 1); strncpy(swarm_cfg.zone_name, g_nvs_config.zone_name, sizeof(swarm_cfg.zone_name) - 1); - swarm_ret = swarm_bridge_init(&swarm_cfg, g_nvs_config.node_id); + swarm_ret = swarm_bridge_init(&swarm_cfg, csi_collector_get_node_id()); if (swarm_ret != ESP_OK) { ESP_LOGW(TAG, "Swarm bridge init failed: %s", esp_err_to_name(swarm_ret)); } diff --git a/firmware/esp32-csi-node/main/wasm_runtime.c b/firmware/esp32-csi-node/main/wasm_runtime.c index d63aaa494..8696be9fa 100644 --- a/firmware/esp32-csi-node/main/wasm_runtime.c +++ b/firmware/esp32-csi-node/main/wasm_runtime.c @@ -13,6 +13,7 @@ #include "sdkconfig.h" #include "wasm_runtime.h" #include "nvs_config.h" +#include "csi_collector.h" /* csi_collector_get_node_id() - defensive #390 */ extern nvs_config_t g_nvs_config; @@ -383,7 +384,7 @@ static void send_wasm_output(uint8_t slot_id) memset(&pkt, 0, sizeof(pkt)); pkt.magic = WASM_OUTPUT_MAGIC; - pkt.node_id = g_nvs_config.node_id; + pkt.node_id = csi_collector_get_node_id(); /* #390: defensive copy */ pkt.module_id = slot_id; pkt.event_count = n_filtered;