diff --git a/panda/python/__init__.py b/panda/python/__init__.py index 9339bfcf8..55a794d54 100644 --- a/panda/python/__init__.py +++ b/panda/python/__init__.py @@ -804,7 +804,8 @@ class Panda: # The panda will NAK CAN writes when there is CAN congestion. # libusb will try to send it again, with a max timeout. # Timeout is in ms. If set to 0, the timeout is infinite. - CAN_SEND_TIMEOUT_MS = 10 + CAN_SEND_TIMEOUT_MS = 5 + CAN_MAX_RETRIES = 3 def can_reset_communications(self): self._handle.controlWrite(Panda.REQUEST_OUT, 0xc0, 0, 0, b'') @@ -812,18 +813,18 @@ class Panda: @ensure_can_packet_version def can_send_many(self, arr, timeout=CAN_SEND_TIMEOUT_MS): snds = pack_can_buffer(arr) - while True: - try: - for tx in snds: - while True: - bs = self._handle.bulkWrite(3, tx, timeout=timeout) - tx = tx[bs:] - if len(tx) == 0: - break - logging.error("CAN: PARTIAL SEND MANY, RETRYING") - break - except (usb1.USBErrorIO, usb1.USBErrorOverflow): - logging.error("CAN: BAD SEND MANY, RETRYING") + for tx in snds: + retries = 0 + while len(tx) > 0: + bs = self._handle.bulkWrite(3, tx, timeout=timeout) + if bs == 0: + retries += 1 + if retries > self.CAN_MAX_RETRIES: + logging.warning("CAN send: no progress after retries, dropping") + break + else: + retries = 0 + tx = tx[bs:] def can_send(self, addr, dat, bus, timeout=CAN_SEND_TIMEOUT_MS): self.can_send_many([[addr, None, dat, bus]], timeout=timeout) @@ -831,13 +832,16 @@ class Panda: @ensure_can_packet_version def can_recv(self): dat = bytearray() - while True: + for _ in range(self.CAN_MAX_RETRIES): try: dat = self._handle.bulkRead(1, 16384) # Max receive batch size + 2 extra reserve frames break except (usb1.USBErrorIO, usb1.USBErrorOverflow): logging.error("CAN: BAD RECV, RETRYING") - time.sleep(0.1) + time.sleep(0.01) + else: + logging.error("CAN: recv failed after retries") + return [] msgs, self.can_rx_overflow_buffer = unpack_can_buffer(self.can_rx_overflow_buffer + dat) return msgs diff --git a/panda/python/spi.py b/panda/python/spi.py index be4f7dcf4..681f44a56 100644 --- a/panda/python/spi.py +++ b/panda/python/spi.py @@ -27,7 +27,10 @@ NACK = 0x1F CHECKSUM_START = 0xAB MIN_ACK_TIMEOUT_MS = 100 +MAX_ACK_TIMEOUT_MS = 500 # like C++ SPI_ACK_TIMEOUT +DEFAULT_TIMEOUT_MS = 500 # default when timeout=0 MAX_XFER_RETRY_COUNT = 5 +MAX_TIMEOUT_RETRIES = 5 # like C++ XFER_SIZE = 0x40*31 @@ -152,6 +155,8 @@ class PandaSpiHandle(BaseHandle): return cksum def _wait_for_ack(self, spi, ack_val: int, timeout: int, tx: int, length: int = 1) -> bytes: + # Original behavior preserved - timeout=0 means wait forever within this function + # The caller (_transfer) handles the overall timeout timeout_s = max(MIN_ACK_TIMEOUT_MS, timeout) * 1e-3 start = time.monotonic() @@ -225,10 +230,15 @@ class PandaSpiHandle(BaseHandle): logging.debug("starting transfer: endpoint=%d, max_rx_len=%d", endpoint, max_rx_len) logging.debug("==============================================") + # Fix timeout=0 infinite loop: default to DEFAULT_TIMEOUT_MS + if timeout == 0: + timeout = DEFAULT_TIMEOUT_MS + n = 0 start_time = time.monotonic() exc = PandaSpiException() - while (timeout == 0) or (time.monotonic() - start_time) < timeout*1e-3: + # Use the timeout for the overall loop, matching original behavior but with timeout=0 fixed + while (time.monotonic() - start_time) < timeout * 1e-3: n += 1 logging.debug("\ntry #%d", n) with self.dev.acquire() as spi: @@ -238,6 +248,7 @@ class PandaSpiHandle(BaseHandle): exc = e logging.debug("SPI transfer failed, retrying", exc_info=True) + logging.error("SPI transfer failed after %d tries, %.2fms", n, (time.monotonic() - start_time) * 1000) raise exc def get_protocol_version(self) -> bytes: