mirror of
https://github.com/firestar5683/StarPilot.git
synced 2026-07-03 04:22:09 +08:00
Increase Fault Resilience
This commit is contained in:
+19
-15
@@ -804,7 +804,8 @@ class Panda:
|
||||
# The panda will NAK CAN writes when there is CAN congestion.
|
||||
# libusb will try to send it again, with a max timeout.
|
||||
# Timeout is in ms. If set to 0, the timeout is infinite.
|
||||
CAN_SEND_TIMEOUT_MS = 10
|
||||
CAN_SEND_TIMEOUT_MS = 5
|
||||
CAN_MAX_RETRIES = 3
|
||||
|
||||
def can_reset_communications(self):
|
||||
self._handle.controlWrite(Panda.REQUEST_OUT, 0xc0, 0, 0, b'')
|
||||
@@ -812,18 +813,18 @@ class Panda:
|
||||
@ensure_can_packet_version
|
||||
def can_send_many(self, arr, timeout=CAN_SEND_TIMEOUT_MS):
|
||||
snds = pack_can_buffer(arr)
|
||||
while True:
|
||||
try:
|
||||
for tx in snds:
|
||||
while True:
|
||||
bs = self._handle.bulkWrite(3, tx, timeout=timeout)
|
||||
tx = tx[bs:]
|
||||
if len(tx) == 0:
|
||||
break
|
||||
logging.error("CAN: PARTIAL SEND MANY, RETRYING")
|
||||
break
|
||||
except (usb1.USBErrorIO, usb1.USBErrorOverflow):
|
||||
logging.error("CAN: BAD SEND MANY, RETRYING")
|
||||
for tx in snds:
|
||||
retries = 0
|
||||
while len(tx) > 0:
|
||||
bs = self._handle.bulkWrite(3, tx, timeout=timeout)
|
||||
if bs == 0:
|
||||
retries += 1
|
||||
if retries > self.CAN_MAX_RETRIES:
|
||||
logging.warning("CAN send: no progress after retries, dropping")
|
||||
break
|
||||
else:
|
||||
retries = 0
|
||||
tx = tx[bs:]
|
||||
|
||||
def can_send(self, addr, dat, bus, timeout=CAN_SEND_TIMEOUT_MS):
|
||||
self.can_send_many([[addr, None, dat, bus]], timeout=timeout)
|
||||
@@ -831,13 +832,16 @@ class Panda:
|
||||
@ensure_can_packet_version
|
||||
def can_recv(self):
|
||||
dat = bytearray()
|
||||
while True:
|
||||
for _ in range(self.CAN_MAX_RETRIES):
|
||||
try:
|
||||
dat = self._handle.bulkRead(1, 16384) # Max receive batch size + 2 extra reserve frames
|
||||
break
|
||||
except (usb1.USBErrorIO, usb1.USBErrorOverflow):
|
||||
logging.error("CAN: BAD RECV, RETRYING")
|
||||
time.sleep(0.1)
|
||||
time.sleep(0.01)
|
||||
else:
|
||||
logging.error("CAN: recv failed after retries")
|
||||
return []
|
||||
msgs, self.can_rx_overflow_buffer = unpack_can_buffer(self.can_rx_overflow_buffer + dat)
|
||||
return msgs
|
||||
|
||||
|
||||
+12
-1
@@ -27,7 +27,10 @@ NACK = 0x1F
|
||||
CHECKSUM_START = 0xAB
|
||||
|
||||
MIN_ACK_TIMEOUT_MS = 100
|
||||
MAX_ACK_TIMEOUT_MS = 500 # like C++ SPI_ACK_TIMEOUT
|
||||
DEFAULT_TIMEOUT_MS = 500 # default when timeout=0
|
||||
MAX_XFER_RETRY_COUNT = 5
|
||||
MAX_TIMEOUT_RETRIES = 5 # like C++
|
||||
|
||||
XFER_SIZE = 0x40*31
|
||||
|
||||
@@ -152,6 +155,8 @@ class PandaSpiHandle(BaseHandle):
|
||||
return cksum
|
||||
|
||||
def _wait_for_ack(self, spi, ack_val: int, timeout: int, tx: int, length: int = 1) -> bytes:
|
||||
# Original behavior preserved - timeout=0 means wait forever within this function
|
||||
# The caller (_transfer) handles the overall timeout
|
||||
timeout_s = max(MIN_ACK_TIMEOUT_MS, timeout) * 1e-3
|
||||
|
||||
start = time.monotonic()
|
||||
@@ -225,10 +230,15 @@ class PandaSpiHandle(BaseHandle):
|
||||
logging.debug("starting transfer: endpoint=%d, max_rx_len=%d", endpoint, max_rx_len)
|
||||
logging.debug("==============================================")
|
||||
|
||||
# Fix timeout=0 infinite loop: default to DEFAULT_TIMEOUT_MS
|
||||
if timeout == 0:
|
||||
timeout = DEFAULT_TIMEOUT_MS
|
||||
|
||||
n = 0
|
||||
start_time = time.monotonic()
|
||||
exc = PandaSpiException()
|
||||
while (timeout == 0) or (time.monotonic() - start_time) < timeout*1e-3:
|
||||
# Use the timeout for the overall loop, matching original behavior but with timeout=0 fixed
|
||||
while (time.monotonic() - start_time) < timeout * 1e-3:
|
||||
n += 1
|
||||
logging.debug("\ntry #%d", n)
|
||||
with self.dev.acquire() as spi:
|
||||
@@ -238,6 +248,7 @@ class PandaSpiHandle(BaseHandle):
|
||||
exc = e
|
||||
logging.debug("SPI transfer failed, retrying", exc_info=True)
|
||||
|
||||
logging.error("SPI transfer failed after %d tries, %.2fms", n, (time.monotonic() - start_time) * 1000)
|
||||
raise exc
|
||||
|
||||
def get_protocol_version(self) -> bytes:
|
||||
|
||||
Reference in New Issue
Block a user