fix(modbus): zadne vecne pending v journalu + flock timeout + EV poll backoff
Zivy incident home-01 (TeltoCharge .16): zapis 15/19-20 koncil failed s prazdnym error_msg, nebo zustal trvale pending a zablokoval exportni ticky. - _gateway_exclusive: neblokujici flock s deadline (EMS_MODBUS_FLOCK_TIMEOUT_S, default 20 s) -> GatewayLockTimeout misto starvation bez limitu - execute_modbus_commands: invariant written/failed + neprazdny error_msg (str(e) or repr(e)); safety net pres BaseException (CancelledError, chyba DB); journal update mimo retry cyklus zarizeni; force_disconnect bez zamku brany - telemetry poll_ev_chargers: po 3 selhanich backoff 5 min per (host,port,unit) - mrtvy unit_id drzi branu 4x8=32 s z kazde minuty - testy backend/tests/test_modbus_execute_failsafe.py; docs modbus-command-journal.md (sekce Robustnost zapisu + konfigurace) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import asyncpg
|
||||
@@ -264,6 +265,35 @@ DEYE_REG_CONTROL_BOARD_SPECIAL1 = 178
|
||||
TELTO_REG_BLOCK_START = 0
|
||||
TELTO_REG_BLOCK_COUNT = 41
|
||||
|
||||
#: Backoff pro nedosažitelný wallbox: čtení mrtvého unit_id drží exkluzivní
|
||||
#: zámek brány až (retries+1)×timeout = 4×8 = 32 s (pymodbus) — každou minutu.
|
||||
#: Po EV_POLL_FAIL_THRESHOLD selháních v řadě se poll daného (host,port,unit)
|
||||
#: zkouší jen 1× za EV_POLL_BACKOFF_S; úspěšné čtení backoff resetuje.
|
||||
EV_POLL_FAIL_THRESHOLD = 3
|
||||
EV_POLL_BACKOFF_S = 300.0
|
||||
_EV_POLL_FAIL_STREAK: dict[tuple[str, int, int], int] = {}
|
||||
_EV_POLL_NEXT_ATTEMPT: dict[tuple[str, int, int], float] = {}
|
||||
|
||||
|
||||
def _ev_poll_should_skip(key: tuple[str, int, int], now_mono: float) -> bool:
|
||||
return (
|
||||
_EV_POLL_FAIL_STREAK.get(key, 0) >= EV_POLL_FAIL_THRESHOLD
|
||||
and now_mono < _EV_POLL_NEXT_ATTEMPT.get(key, 0.0)
|
||||
)
|
||||
|
||||
|
||||
def _ev_poll_record_failure(key: tuple[str, int, int], now_mono: float) -> int:
|
||||
streak = _EV_POLL_FAIL_STREAK.get(key, 0) + 1
|
||||
_EV_POLL_FAIL_STREAK[key] = streak
|
||||
if streak >= EV_POLL_FAIL_THRESHOLD:
|
||||
_EV_POLL_NEXT_ATTEMPT[key] = now_mono + EV_POLL_BACKOFF_S
|
||||
return streak
|
||||
|
||||
|
||||
def _ev_poll_record_success(key: tuple[str, int, int]) -> None:
|
||||
_EV_POLL_FAIL_STREAK.pop(key, None)
|
||||
_EV_POLL_NEXT_ATTEMPT.pop(key, None)
|
||||
|
||||
#: EVSE status (reg 6) → interní stav; session detekce stojí na 'available' vs ≠'available'
|
||||
#: (fn_ev_session_transition), proto každý stav s připojeným EV musí být ≠ 'available'.
|
||||
TELTO_STATUS_MAP = {
|
||||
@@ -430,6 +460,15 @@ async def poll_ev_chargers(site_id: int, db: asyncpg.Connection) -> None:
|
||||
port = int(row["port"] or 502)
|
||||
unit_id = int(row["unit_id"] if row["unit_id"] is not None else 1)
|
||||
|
||||
poll_key = (str(host), port, unit_id)
|
||||
now_mono = time.monotonic()
|
||||
if _ev_poll_should_skip(poll_key, now_mono):
|
||||
logger.debug(
|
||||
"EV charger %s (%s:%s u%s) in backoff, poll skipped",
|
||||
code, host, port, unit_id,
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
client = await get_modbus_client(host, port)
|
||||
async with client.batch(unit_id) as mb:
|
||||
@@ -440,8 +479,19 @@ async def poll_ev_chargers(site_id: int, db: asyncpg.Connection) -> None:
|
||||
except Exception as e:
|
||||
# Při výpadku čtení NIC nezapisovat — fabrikovaný 'available' by
|
||||
# falešně ukončoval EV session a špinil bazál (power 0).
|
||||
logger.warning("EV charger %s (%s:%s) read failed: %s", code, host, port, e)
|
||||
streak = _ev_poll_record_failure(poll_key, time.monotonic())
|
||||
backoff = (
|
||||
f" (streak {streak} >= {EV_POLL_FAIL_THRESHOLD}, "
|
||||
f"backoff {EV_POLL_BACKOFF_S:.0f}s — neblokovat bránu)"
|
||||
if streak >= EV_POLL_FAIL_THRESHOLD
|
||||
else ""
|
||||
)
|
||||
logger.warning(
|
||||
"EV charger %s (%s:%s u%s) read failed: %s%s",
|
||||
code, host, port, unit_id, e, backoff,
|
||||
)
|
||||
continue
|
||||
_ev_poll_record_success(poll_key)
|
||||
|
||||
current_status = str(frame["status"])
|
||||
if frame["error_bits"]:
|
||||
|
||||
Reference in New Issue
Block a user