diff --git a/inc/sp140/bms.h b/inc/sp140/bms.h index fcde6e9a..ed5d2005 100644 --- a/inc/sp140/bms.h +++ b/inc/sp140/bms.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "sp140/structs.h" @@ -10,6 +11,39 @@ #define MCP_CS 5 // MCP2515 CS pin #define MCP_BAUDRATE 250000 +// BMS cell probe disconnect policy. +// The library returns NaN for disconnected probes. This app tolerates up to +// BMS_MAX_IGNORED_DISCONNECTED_PROBES disconnected; beyond that the NaN is +// replaced with the sentinel value so downstream temp monitors fire an alert. +constexpr uint8_t BMS_CELL_PROBE_COUNT = 4; +constexpr uint8_t BMS_MAX_IGNORED_DISCONNECTED_PROBES = 2; + +inline void sanitizeCellProbeTemps( + const float temps[BMS_CELL_PROBE_COUNT], + float out[BMS_CELL_PROBE_COUNT]) { + uint8_t disconnectedCount = 0; + + // First pass: count disconnected probes (NaN from library) + for (uint8_t i = 0; i < BMS_CELL_PROBE_COUNT; i++) { + if (isnan(temps[i])) disconnectedCount++; + } + + // Second pass: if within tolerance, pass NaN through (silently ignored). + // If too many are disconnected, replace excess NaN with the sentinel value + // so temperature monitors fire a CRIT_LOW alert. + uint8_t ignoredCount = 0; + for (uint8_t i = 0; i < BMS_CELL_PROBE_COUNT; i++) { + if (!isnan(temps[i])) { + out[i] = temps[i]; + } else if (ignoredCount < BMS_MAX_IGNORED_DISCONNECTED_PROBES) { + out[i] = NAN; + ignoredCount++; + } else { + out[i] = BMS_CAN::TEMP_PROBE_DISCONNECTED; + } + } +} + // External declarations extern STR_BMS_TELEMETRY_140 bmsTelemetryData; extern BMS_CAN* bms_can; diff --git a/inc/sp140/structs.h b/inc/sp140/structs.h index 29715c09..043ac821 100644 --- a/inc/sp140/structs.h +++ b/inc/sp140/structs.h @@ -76,8 +76,8 @@ typedef struct { float power; // Power (kW) float highest_cell_voltage; // Highest individual cell voltage (V) float lowest_cell_voltage; // Lowest individual cell voltage (V) - float highest_temperature; // Highest temperature reading (°C) - float lowest_temperature; // Lowest temperature reading (°C) + float highest_temperature; // Highest valid temperature reading (°C), NaN if unavailable + float lowest_temperature; // Lowest valid temperature reading (°C), NaN if unavailable float energy_cycle; // Energy per cycle (kWh) uint32_t battery_cycle; // Battery cycle count uint8_t battery_fail_level; // Battery failure status @@ -92,10 +92,10 @@ typedef struct { // Individual temperature sensors float mos_temperature; // BMS MOSFET temperature (°C) - index 0 float balance_temperature; // BMS balance resistor temperature (°C) - index 1 - float t1_temperature; // T1 cell temperature sensor (°C) - index 2 - float t2_temperature; // T2 cell temperature sensor (°C) - index 3 - float t3_temperature; // T3 cell temperature sensor (°C) - index 4 - float t4_temperature; // T4 cell temperature sensor (°C) - index 5 + float t1_temperature; // T1 cell temperature sensor (°C), NaN if disconnected - index 2 + float t2_temperature; // T2 cell temperature sensor (°C), NaN if disconnected - index 3 + float t3_temperature; // T3 cell temperature sensor (°C), NaN if disconnected - index 4 + float t4_temperature; // T4 cell temperature sensor (°C), NaN if disconnected - index 5 } STR_BMS_TELEMETRY_140; #pragma pack(pop) diff --git a/platformio.ini b/platformio.ini index 5339c367..7749a8b7 100644 --- a/platformio.ini +++ b/platformio.ini @@ -21,7 +21,7 @@ lib_ignore = [env:OpenPPG-CESP32S3-CAN-SP140] -platform = espressif32@6.12.0 +platform = espressif32@6.13.0 board = m5stack-stamps3 framework = arduino src_folder = sp140 @@ -43,19 +43,19 @@ debug_tool = esp-builtin lib_deps = Wire SPI - ArduinoJson@7.3.1 + ArduinoJson@7.4.3 Time@1.6.1 - adafruit/Adafruit BusIO@1.17.2 + adafruit/Adafruit BusIO@1.17.4 adafruit/Adafruit BMP3XX Library@2.1.6 adafruit/Adafruit ST7735 and ST7789 Library@1.11.0 - adafruit/Adafruit NeoPixel@1.15.2 - adafruit/Adafruit CAN@0.2.1 + adafruit/Adafruit NeoPixel@1.15.4 + adafruit/Adafruit CAN@0.2.3 adafruit/Adafruit MCP2515@0.2.1 https://github.com/rlogiacco/CircularBuffer@1.4.0 https://github.com/openppg/SINE-ESC-CAN#8caa93996b5d000fe10ca5265bd1c472dfdf885b - https://github.com/openppg/ANT-BMS-CAN#da685ce2a0e87e23df625f33ad751203ad6a4f8f + https://github.com/openppg/ANT-BMS-CAN#fd54852bc6f1c9608e37af9ca7c13ea4135c095b lvgl/lvgl@^8.4.0 - h2zero/NimBLE-Arduino@^2.3.5 + h2zero/NimBLE-Arduino@^2.3.9 lib_ignore = Adafruit SleepyDog Library ${extra.lib_ignore} diff --git a/src/sp140/bms.cpp b/src/sp140/bms.cpp index 768bf71a..9541ad1c 100644 --- a/src/sp140/bms.cpp +++ b/src/sp140/bms.cpp @@ -3,6 +3,34 @@ #include "sp140/globals.h" #include "sp140/lvgl/lvgl_core.h" // for spiBusMutex +namespace { + +void logBmsCellProbeConnectionTransitions(const float sanitizedCellTemps[BMS_CELL_PROBE_COUNT]) { + static bool hasPreviousState = false; + static bool wasConnected[BMS_CELL_PROBE_COUNT] = {false, false, false, false}; + + for (uint8_t i = 0; i < BMS_CELL_PROBE_COUNT; i++) { + const bool connected = !isnan(sanitizedCellTemps[i]); + + if (!hasPreviousState) { + wasConnected[i] = connected; + continue; + } + + if (connected != wasConnected[i]) { + USBSerial.printf("[BMS] T%u sensor %s (sanitized=%.1fC)\n", + i + 1, + connected ? "reconnected" : "disconnected", + sanitizedCellTemps[i]); + wasConnected[i] = connected; + } + } + + hasPreviousState = true; +} + +} // namespace + STR_BMS_TELEMETRY_140 bmsTelemetryData = { .bmsState = TelemetryState::NOT_CONNECTED }; @@ -51,10 +79,6 @@ void updateBMSData() { // Calculated highest cell minus lowest cell voltage bmsTelemetryData.voltage_differential = bms_can->getHighestCellVoltage() - bms_can->getLowestCellVoltage(); - // Temperature readings - bmsTelemetryData.highest_temperature = bms_can->getHighestTemperature(); - bmsTelemetryData.lowest_temperature = bms_can->getLowestTemperature(); - // Battery statistics bmsTelemetryData.battery_cycle = bms_can->getBatteryCycle(); bmsTelemetryData.energy_cycle = bms_can->getEnergyCycle(); @@ -70,13 +94,31 @@ void updateBMSData() { bmsTelemetryData.cell_voltages[i] = bms_can->getCellVoltage(i); } - // Populate individual temperature sensors + // Populate temperature sensors (library returns NaN for disconnected probes) bmsTelemetryData.mos_temperature = bms_can->getTemperature(0); // BMS MOSFET bmsTelemetryData.balance_temperature = bms_can->getTemperature(1); // BMS Balance resistors - bmsTelemetryData.t1_temperature = bms_can->getTemperature(2); // Cell probe 1 - bmsTelemetryData.t2_temperature = bms_can->getTemperature(3); // Cell probe 2 - bmsTelemetryData.t3_temperature = bms_can->getTemperature(4); // Cell probe 3 - bmsTelemetryData.t4_temperature = bms_can->getTemperature(5); // Cell probe 4 + + // Cell probes: library already returns NaN for disconnected. Apply app-level + // policy (tolerate up to N disconnected before alerting). + const float cellTemps[BMS_CELL_PROBE_COUNT] = { + bms_can->getTemperature(2), + bms_can->getTemperature(3), + bms_can->getTemperature(4), + bms_can->getTemperature(5) + }; + float sanitizedCellTemps[BMS_CELL_PROBE_COUNT]; + sanitizeCellProbeTemps(cellTemps, sanitizedCellTemps); + bmsTelemetryData.t1_temperature = sanitizedCellTemps[0]; + bmsTelemetryData.t2_temperature = sanitizedCellTemps[1]; + bmsTelemetryData.t3_temperature = sanitizedCellTemps[2]; + bmsTelemetryData.t4_temperature = sanitizedCellTemps[3]; + + // Emit transition logs to help field-debug intermittent probe wiring issues. + logBmsCellProbeConnectionTransitions(sanitizedCellTemps); + + // Library already excludes disconnected probes from high/low temps + bmsTelemetryData.highest_temperature = bms_can->getHighestTemperature(); + bmsTelemetryData.lowest_temperature = bms_can->getLowestTemperature(); bmsTelemetryData.lastUpdateMs = millis(); unsigned long dur = bmsTelemetryData.lastUpdateMs - tStart; diff --git a/src/sp140/lvgl/lvgl_updates.cpp b/src/sp140/lvgl/lvgl_updates.cpp index 8897c670..5b22fe55 100644 --- a/src/sp140/lvgl/lvgl_updates.cpp +++ b/src/sp140/lvgl/lvgl_updates.cpp @@ -334,11 +334,25 @@ void updateLvglMainScreen( float batteryPercent = unifiedBatteryData.soc; float totalVolts = unifiedBatteryData.volts; float lowestCellV = bmsTelemetry.lowest_cell_voltage; - // Calculate highest cell temperature from T1-T4 only (excluding MOSFET and balance temps) - float batteryTemp = bmsTelemetry.t1_temperature; - if (bmsTelemetry.t2_temperature > batteryTemp) batteryTemp = bmsTelemetry.t2_temperature; - if (bmsTelemetry.t3_temperature > batteryTemp) batteryTemp = bmsTelemetry.t3_temperature; - if (bmsTelemetry.t4_temperature > batteryTemp) batteryTemp = bmsTelemetry.t4_temperature; + // Calculate battery temp from connected T1-T4 cell probes only. + const float cellTemps[] = { + bmsTelemetry.t1_temperature, + bmsTelemetry.t2_temperature, + bmsTelemetry.t3_temperature, + bmsTelemetry.t4_temperature + }; + float batteryTemp = NAN; + bool hasValidBatteryTemp = false; + for (float cellTemp : cellTemps) { + if (isnan(cellTemp)) { + continue; + } + + if (!hasValidBatteryTemp || cellTemp > batteryTemp) { + batteryTemp = cellTemp; + hasValidBatteryTemp = true; + } + } float escTemp = escTelemetry.cap_temp; float motorTemp = escTelemetry.motor_temp; // Check if BMS or ESC is connected @@ -660,7 +674,7 @@ void updateLvglMainScreen( lv_obj_remove_style(batt_temp_bg, &style_warning, 0); lv_obj_remove_style(batt_temp_bg, &style_critical, 0); - if (bmsTelemetry.bmsState == TelemetryState::CONNECTED) { + if (bmsTelemetry.bmsState == TelemetryState::CONNECTED && hasValidBatteryTemp) { lv_label_set_text_fmt(batt_temp_label, "%d", static_cast(batteryTemp)); if (batteryTemp >= bmsCellTempThresholds.critHigh) { lv_obj_add_style(batt_temp_bg, &style_critical, 0); @@ -672,6 +686,7 @@ void updateLvglMainScreen( lv_obj_add_flag(batt_temp_bg, LV_OBJ_FLAG_HIDDEN); } } else { + // No valid cell probe connected: show "-" instead of a fake low reading. lv_label_set_text(batt_temp_label, "-"); lv_obj_add_flag(batt_temp_bg, LV_OBJ_FLAG_HIDDEN); } diff --git a/test/native_stubs/BMS_CAN.h b/test/native_stubs/BMS_CAN.h new file mode 100644 index 00000000..7eb11067 --- /dev/null +++ b/test/native_stubs/BMS_CAN.h @@ -0,0 +1,9 @@ +#pragma once + +#include +#include + +class BMS_CAN { +public: + static constexpr float TEMP_PROBE_DISCONNECTED = -40.0f; +}; diff --git a/test/native_stubs/SPI.h b/test/native_stubs/SPI.h new file mode 100644 index 00000000..f74ca657 --- /dev/null +++ b/test/native_stubs/SPI.h @@ -0,0 +1,3 @@ +#pragma once + +class SPIClass {}; diff --git a/test/test_simplemonitor/test_simplemonitor.cpp b/test/test_simplemonitor/test_simplemonitor.cpp index f3d6ed8f..380f5538 100644 --- a/test/test_simplemonitor/test_simplemonitor.cpp +++ b/test/test_simplemonitor/test_simplemonitor.cpp @@ -1,4 +1,5 @@ #include +#include "sp140/bms.h" #include "sp140/simple_monitor.h" #include "sp140/monitor_config.h" @@ -237,6 +238,166 @@ TEST(SimpleMonitor, BMSTemperatureAlerts) { EXPECT_EQ(logger.entries.back().lvl, AlertLevel::CRIT_HIGH); } +TEST(SimpleMonitor, BMSCellTemperatureDisconnectedIsIgnored) { + FakeLogger logger; + float fakeTemp = 30.0f; + + SensorMonitor cellTempMon( + SensorID::BMS_T1_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return fakeTemp; }, + &logger); + + cellTempMon.check(); // Baseline OK + EXPECT_TRUE(logger.entries.empty()); + + // Disconnected probe reading should be treated as invalid (sanitized to NaN upstream). + fakeTemp = NAN; + cellTempMon.check(); + EXPECT_TRUE(logger.entries.empty()); + + // Remaining connected probes still report alerts normally. + fakeTemp = 54.0f; + cellTempMon.check(); + ASSERT_EQ(logger.entries.size(), 1u); + EXPECT_EQ(logger.entries.back().lvl, AlertLevel::WARN_HIGH); +} + +TEST(SimpleMonitor, BMSCellTemperatureDisconnectClearsActiveAlert) { + FakeLogger logger; + float fakeTemp = 55.0f; + + SensorMonitor cellTempMon( + SensorID::BMS_T2_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return fakeTemp; }, + &logger); + + // Enter warning state first. + cellTempMon.check(); + ASSERT_EQ(logger.entries.size(), 1u); + EXPECT_EQ(logger.entries.back().lvl, AlertLevel::WARN_HIGH); + + // Disconnect should clear the active alert. + fakeTemp = NAN; + cellTempMon.check(); + ASSERT_EQ(logger.entries.size(), 2u); + EXPECT_EQ(logger.entries.back().lvl, AlertLevel::OK); +} + +// sanitizeCellProbeTemps now receives NaN from the library for disconnected +// probes. It applies app policy: up to 2 NaN are passed through silently; +// beyond that, NaN is replaced with the sentinel so temp monitors alert. + +TEST(SimpleMonitor, BMSCellProbeSanitizerIgnoresUpToTwoDisconnected) { + // Two disconnected (NaN from library), two connected + const float temps[BMS_CELL_PROBE_COUNT] = {NAN, -39.0f, NAN, 12.0f}; + float out[BMS_CELL_PROBE_COUNT] = {}; + + sanitizeCellProbeTemps(temps, out); + + EXPECT_TRUE(isnan(out[0])); + EXPECT_FLOAT_EQ(out[1], -39.0f); + EXPECT_TRUE(isnan(out[2])); + EXPECT_FLOAT_EQ(out[3], 12.0f); +} + +TEST(SimpleMonitor, BMSCellProbeSanitizerAlertsOnThirdDisconnected) { + // Three disconnected: first two become NaN, third becomes sentinel for alert + const float temps[BMS_CELL_PROBE_COUNT] = {NAN, NAN, NAN, 25.0f}; + float out[BMS_CELL_PROBE_COUNT] = {}; + + sanitizeCellProbeTemps(temps, out); + + EXPECT_TRUE(isnan(out[0])); + EXPECT_TRUE(isnan(out[1])); + EXPECT_FLOAT_EQ(out[2], BMS_CAN::TEMP_PROBE_DISCONNECTED); + EXPECT_FLOAT_EQ(out[3], 25.0f); +} + +TEST(SimpleMonitor, BMSCellProbeSanitizerAlertsOnThirdAndFourthDisconnected) { + // All four disconnected: first two NaN, third and fourth become sentinel + const float temps[BMS_CELL_PROBE_COUNT] = {NAN, NAN, NAN, NAN}; + float out[BMS_CELL_PROBE_COUNT] = {}; + + sanitizeCellProbeTemps(temps, out); + + EXPECT_TRUE(isnan(out[0])); + EXPECT_TRUE(isnan(out[1])); + EXPECT_FLOAT_EQ(out[2], BMS_CAN::TEMP_PROBE_DISCONNECTED); + EXPECT_FLOAT_EQ(out[3], BMS_CAN::TEMP_PROBE_DISCONNECTED); +} + +TEST(SimpleMonitor, BMSCellProbeSanitizerPassesThroughValidTemps) { + const float temps[BMS_CELL_PROBE_COUNT] = {10.0f, 20.0f, 30.0f, 40.0f}; + float out[BMS_CELL_PROBE_COUNT] = {}; + + sanitizeCellProbeTemps(temps, out); + + EXPECT_FLOAT_EQ(out[0], 10.0f); + EXPECT_FLOAT_EQ(out[1], 20.0f); + EXPECT_FLOAT_EQ(out[2], 30.0f); + EXPECT_FLOAT_EQ(out[3], 40.0f); +} + +TEST(SimpleMonitor, BMSCellProbeDisconnectTransitionTriggersAndClearsAlert) { + FakeLogger logger; + float sanitizedTemps[BMS_CELL_PROBE_COUNT] = {NAN, NAN, NAN, NAN}; + + SensorMonitor t1Mon( + SensorID::BMS_T1_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return sanitizedTemps[0]; }, + &logger); + SensorMonitor t2Mon( + SensorID::BMS_T2_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return sanitizedTemps[1]; }, + &logger); + SensorMonitor t3Mon( + SensorID::BMS_T3_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return sanitizedTemps[2]; }, + &logger); + SensorMonitor t4Mon( + SensorID::BMS_T4_Temp, + SensorCategory::BMS, + bmsCellTempThresholds, + [&]() { return sanitizedTemps[3]; }, + &logger); + + const auto evaluateCycle = [&](const float temps[BMS_CELL_PROBE_COUNT]) { + sanitizeCellProbeTemps(temps, sanitizedTemps); + t1Mon.check(); + t2Mon.check(); + t3Mon.check(); + t4Mon.check(); + }; + + // Two disconnected probes (NaN) are within tolerance - no alert. + const float twoDisconnected[BMS_CELL_PROBE_COUNT] = {NAN, NAN, 5.0f, 6.0f}; + evaluateCycle(twoDisconnected); + EXPECT_TRUE(logger.entries.empty()); + + // Third disconnect exceeds tolerance - sentinel triggers CRIT_LOW alert. + const float threeDisconnected[BMS_CELL_PROBE_COUNT] = {NAN, NAN, NAN, 6.0f}; + evaluateCycle(threeDisconnected); + ASSERT_EQ(logger.entries.size(), 1u); + EXPECT_EQ(logger.entries[0].id, SensorID::BMS_T3_Temp); + EXPECT_EQ(logger.entries[0].lvl, AlertLevel::CRIT_LOW); + + // Returning to two disconnected clears the alert. + evaluateCycle(twoDisconnected); + ASSERT_EQ(logger.entries.size(), 2u); + EXPECT_EQ(logger.entries[1].id, SensorID::BMS_T3_Temp); + EXPECT_EQ(logger.entries[1].lvl, AlertLevel::OK); +} + TEST(SimpleMonitor, MonitorIDAccess) { FakeLogger logger;