Refactor type annotations and return types in model_runner

Removed specific type hints and return annotations for `self.inputs` and `run_model` methods to enhance flexibility and maintain consistency. These changes streamline the code and improve compatibility with varying input/output types during model inference.
Refactor type hints for improved code consistency.
2026-08-06 21:55:41 +08:00 · 2024-12-29 18:04:05 +01:00 · 2024-12-29 17:59:50 +01:00 · 2024-12-29 17:51:30 +01:00 · 2024-12-29 17:50:58 +01:00 · 2024-12-29 17:48:00 +01:00
257 changed files with 485 additions and 9331 deletions
@@ -1,2 +0,0 @@
-Wen
-REGIST
@@ -86,7 +86,7 @@ jobs:
        run: >-
          sudo apt-get install -y imagemagick

-          scenes="homescreen settings_device settings_software settings_sunnylink settings_toggles settings_sunnypilot settings_sunnypilot_mads settings_developer offroad_alert update_available prime onroad onroad_disengaged onroad_override onroad_sidebar onroad_wide onroad_wide_sidebar onroad_alert_small onroad_alert_mid onroad_alert_full driver_camera body keyboard"
+          scenes="homescreen settings_device settings_software settings_toggles settings_developer offroad_alert update_available prime onroad onroad_disengaged onroad_override onroad_sidebar onroad_wide onroad_wide_sidebar onroad_alert_small onroad_alert_mid onroad_alert_full driver_camera body keyboard"
          A=($scenes)

          DIFF=""
@@ -74,12 +74,6 @@ AddOption('--minimal',
          default=os.path.exists(File('#.lfsconfig').abspath), # minimal by default on release branch (where there's no LFS)
          help='the minimum build to run openpilot. no tests, tools, etc.')

-AddOption('--stock-ui',
-          action='store_true',
-          dest='stock_ui',
-          default=False,
-          help='Build stock openpilot UI instead of sunnypilot UI')
-
 ## Architecture name breakdown (arch)
 ## - larch64: linux tici aarch64
 ## - aarch64: linux pc aarch64
@@ -178,10 +172,6 @@ else:
 if arch != "Darwin":
  ldflags += ["-Wl,--as-needed", "-Wl,--no-undefined"]

-if not GetOption('stock_ui'):
-  cflags += ["-DSUNNYPILOT"]
-  cxxflags += ["-DSUNNYPILOT"]
-
 ccflags_option = GetOption('ccflags')
 if ccflags_option:
  ccflags += ccflags_option.split(' ')
@@ -8,72 +8,10 @@ $Cxx.namespace("cereal");
 # cereal, so use these if you want custom events in your fork.

 # you can rename the struct, but don't change the identifier
-struct SelfdriveStateSP @0x81c2f05a394cf4af {
-  mads @0 :ModularAssistiveDrivingSystem;
-
-  struct ModularAssistiveDrivingSystem {
-    state @0 :ModularAssistiveDrivingSystemState;
-    enabled @1 :Bool;
-    active @2 :Bool;
-    available @3 :Bool;
-
-    enum ModularAssistiveDrivingSystemState {
-      disabled @0;
-      paused @1;
-      enabled @2;
-      softDisabling @3;
-      overriding @4;
-    }
-  }
+struct CustomReserved0 @0x81c2f05a394cf4af {
 }

-struct ModelManagerSP @0xaedffd8f31e7b55d {
-  activeBundle @0 :ModelBundle;
-  selectedBundle @1 :ModelBundle;
-  availableBundles @2 :List(ModelBundle);
-
-  struct DownloadUri {
-    uri @0 :Text;
-    sha256 @1 :Text;
-  }
-
-  enum Type {
-    drive @0;
-    navigation @1;
-    metadata @2;
-  }
-
-  struct Model {
-    fullName @0 :Text;
-    fileName @1 :Text;
-    downloadUri @2 :DownloadUri;
-    downloadProgress @3 :DownloadProgress;
-    type @4 :Type;
-  }
-
-  enum DownloadStatus {
-    notDownloading @0;
-    downloading @1;
-    downloaded @2;
-    cached @3;
-    failed @4;
-  }
-
-  struct DownloadProgress {
-    status @0 :DownloadStatus;
-    progress @1 :Float32;
-    eta @2 :UInt32;
-  }
-
-  struct ModelBundle {
-    index @0 :UInt32;
-    internalName @1 :Text;
-    displayName @2 :Text;
-    models @3 :List(Model);
-    status @4 :DownloadStatus;
-    generation @5 :UInt32;
-    environment @6 :Text;
-  }
+struct CustomReserved1 @0xaedffd8f31e7b55d {
 }

 struct CustomReserved2 @0xf35cc4560bbf6ec2 {
@@ -125,79 +125,6 @@ struct OnroadEvent @0xc4fa6047f024e718 {
    espActive @90;
    personalityChanged @91;
    aeb @92;
-    eventReserved93 @93;
-    eventReserved94 @94;
-    eventReserved95 @95;
-    eventReserved96 @96;
-    eventReserved97 @97;
-    eventReserved98 @98;
-    eventReserved99 @99;
-    eventReserved100 @100;
-    eventReserved101 @101;
-    eventReserved102 @102;
-    eventReserved103 @103;
-    eventReserved104 @104;
-    eventReserved105 @105;
-    eventReserved106 @106;
-    eventReserved107 @107;
-    eventReserved108 @108;
-    eventReserved109 @109;
-    eventReserved110 @110;
-    eventReserved111 @111;
-    eventReserved112 @112;
-    eventReserved113 @113;
-    eventReserved114 @114;
-    eventReserved115 @115;
-    eventReserved116 @116;
-    eventReserved117 @117;
-    eventReserved118 @118;
-    eventReserved119 @119;
-    eventReserved120 @120;
-    eventReserved121 @121;
-    eventReserved122 @122;
-    eventReserved123 @123;
-    eventReserved124 @124;
-    eventReserved125 @125;
-    eventReserved126 @126;
-    eventReserved127 @127;
-    eventReserved128 @128;
-    eventReserved129 @129;
-    eventReserved130 @130;
-    eventReserved131 @131;
-    eventReserved132 @132;
-    eventReserved133 @133;
-    eventReserved134 @134;
-    eventReserved135 @135;
-    eventReserved136 @136;
-    eventReserved137 @137;
-    eventReserved138 @138;
-    eventReserved139 @139;
-    eventReserved140 @140;
-    eventReserved141 @141;
-    eventReserved142 @142;
-    eventReserved143 @143;
-    eventReserved144 @144;
-    eventReserved145 @145;
-    eventReserved146 @146;
-    eventReserved147 @147;
-    eventReserved148 @148;
-    eventReserved149 @149;
-    eventReserved150 @150;
-
-    # sunnypilot
-    lkasEnable @151;
-    lkasDisable @152;
-    manualSteeringRequired @153;
-    manualLongitudinalRequired @154;
-    silentLkasEnable @155;
-    silentLkasDisable @156;
-    silentBrakeHold @157;
-    silentWrongGear @158;
-    silentReverseGear @159;
-    silentDoorOpen @160;
-    silentSeatbeltNotLatched @161;
-    silentParkBrake @162;
-    controlsMismatchLateral @163;

    soundsUnavailableDEPRECATED @47;
  }
@@ -662,7 +589,6 @@ struct PandaState @0xa7649e2575e4591e {

  # safety stuff
  controlsAllowed @3 :Bool;
-  controlsAllowedLat @5 :Bool;
  safetyRxInvalid @19 :UInt32;
  safetyTxBlocked @24 :UInt32;
  safetyModel @14 :Car.CarParams.SafetyModel;
@@ -770,6 +696,7 @@ struct PandaState @0xa7649e2575e4591e {
  }

  gasInterceptorDetectedDEPRECATED @4 :Bool;
+  startedSignalDetectedDEPRECATED @5 :Bool;
  hasGpsDEPRECATED @6 :Bool;
  gmlanSendErrsDEPRECATED @9 :UInt32;
  fanSpeedRpmDEPRECATED @11 :UInt16;
@@ -2631,8 +2558,8 @@ struct Event {
    customReservedRawData2 @126 :Data;

    # *********** Custom: reserved for forks ***********
-    selfdriveStateSP @107 :Custom.SelfdriveStateSP;
-    modelManagerSP @108 :Custom.ModelManagerSP;
+    customReserved0 @107 :Custom.CustomReserved0;
+    customReserved1 @108 :Custom.CustomReserved1;
    customReserved2 @109 :Custom.CustomReserved2;
    customReserved3 @110 :Custom.CustomReserved3;
    customReserved4 @111 :Custom.CustomReserved4;
@@ -74,10 +74,6 @@ _services: dict[str, tuple] = {
  "userFlag": (True, 0., 1),
  "microphone": (True, 10., 10),

-  # sunnypilot
-  "modelManagerSP": (False, 1., 1),
-  "selfdriveStateSP": (True, 100., 10),
-
  # debug
  "uiDebug": (True, 0., 1),
  "testJoystick": (True, 0.),
@@ -1,27 +1,46 @@
-from openpilot.common.api.comma_connect import CommaConnectApi
-from sunnypilot.sunnylink.api import SunnylinkApi
+import jwt
+import os
+import requests
+from datetime import datetime, timedelta, UTC
+from openpilot.system.hardware.hw import Paths
+from openpilot.system.version import get_version

+API_HOST = os.getenv('API_HOST', 'https://api.commadotai.com')

 class Api:
-  def __init__(self, dongle_id, use_sunnylink=False):
-    if use_sunnylink:
-      self.service = SunnylinkApi(dongle_id)
-    else:
-      self.service = CommaConnectApi(dongle_id)
-
-  def request(self, method, endpoint, **params):
-    return self.service.request(method, endpoint, **params)
+  def __init__(self, dongle_id):
+    self.dongle_id = dongle_id
+    with open(Paths.persist_root()+'/comma/id_rsa') as f:
+      self.private_key = f.read()

  def get(self, *args, **kwargs):
-    return self.service.get(*args, **kwargs)
+    return self.request('GET', *args, **kwargs)

  def post(self, *args, **kwargs):
-    return self.service.post(*args, **kwargs)
+    return self.request('POST', *args, **kwargs)
+
+  def request(self, method, endpoint, timeout=None, access_token=None, **params):
+    return api_get(endpoint, method=method, timeout=timeout, access_token=access_token, **params)

  def get_token(self, expiry_hours=1):
-    return self.service.get_token(expiry_hours)
+    now = datetime.now(UTC).replace(tzinfo=None)
+    payload = {
+      'identity': self.dongle_id,
+      'nbf': now,
+      'iat': now,
+      'exp': now + timedelta(hours=expiry_hours)
+    }
+    token = jwt.encode(payload, self.private_key, algorithm='RS256')
+    if isinstance(token, bytes):
+      token = token.decode('utf8')
+    return token


-def api_get(endpoint, method='GET', timeout=None, access_token=None, use_sunnylink=False, **params):
-  return SunnylinkApi(None).api_get(endpoint, method, timeout, access_token, **params) if use_sunnylink \
-    else CommaConnectApi(None).api_get(endpoint, method, timeout, access_token, **params)
+def api_get(endpoint, method='GET', timeout=None, access_token=None, **params):
+  headers = {}
+  if access_token is not None:
+    headers['Authorization'] = "JWT " + access_token
+
+  headers['User-Agent'] = "openpilot-" + get_version()
+
+  return requests.request(method, API_HOST + "/" + endpoint, timeout=timeout, headers=headers, params=params)
@@ -1,56 +0,0 @@
-import jwt
-import requests
-import unicodedata
-from datetime import datetime, timedelta, UTC
-from openpilot.system.hardware.hw import Paths
-from openpilot.system.version import get_version
-
-
-class BaseApi:
-  def __init__(self, dongle_id, api_host, user_agent="openpilot-"):
-    self.dongle_id = dongle_id
-    self.api_host = api_host
-    self.user_agent = user_agent
-    with open(f'{Paths.persist_root()}/comma/id_rsa') as f:
-      self.private_key = f.read()
-
-  def get(self, *args, **kwargs):
-    return self.request('GET', *args, **kwargs)
-
-  def post(self, *args, **kwargs):
-    return self.request('POST', *args, **kwargs)
-
-  def request(self, method, endpoint, timeout=None, access_token=None, **params):
-    return self.api_get(endpoint, method=method, timeout=timeout, access_token=access_token, **params)
-
-  def _get_token(self, expiry_hours=1, **extra_payload):
-    now = datetime.now(UTC).replace(tzinfo=None)
-    payload = {
-      'identity': self.dongle_id,
-      'nbf': now,
-      'iat': now,
-      'exp': now + timedelta(hours=expiry_hours),
-      **extra_payload
-    }
-    token = jwt.encode(payload, self.private_key, algorithm='RS256')
-    if isinstance(token, bytes):
-      token = token.decode('utf8')
-    return token
-
-  def get_token(self, expiry_hours=1):
-    return self._get_token(expiry_hours)
-
-  def remove_non_ascii_chars(self, text):
-    normalized_text = unicodedata.normalize('NFD', text)
-    ascii_encoded_text = normalized_text.encode('ascii', 'ignore')
-    return ascii_encoded_text.decode()
-
-  def api_get(self, endpoint, method='GET', timeout=None, access_token=None, **params):
-    headers = {}
-    if access_token is not None:
-      headers['Authorization'] = "JWT " + access_token
-
-    version = self.remove_non_ascii_chars(get_version())
-    headers['User-Agent'] = self.user_agent + version
-
-    return requests.request(method, f"{self.api_host}/{endpoint}", timeout=timeout, headers=headers, params=params)
@@ -1,11 +0,0 @@
-import os
-
-from openpilot.common.api.base import BaseApi
-
-API_HOST = os.getenv('API_HOST', 'https://api.commadotai.com')
-
-
-class CommaConnectApi(BaseApi):
-  def __init__(self, dongle_id):
-    super().__init__(dongle_id, API_HOST)
-    self.user_agent = "openpilot-"
@@ -109,36 +109,36 @@ std::unordered_map<std::string, uint32_t> keys = {
    {"CurrentBootlog", PERSISTENT},
    {"CurrentRoute", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
    {"DisableLogging", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
-    {"DisablePowerDown", PERSISTENT | BACKUP},
-    {"DisableUpdates", PERSISTENT | BACKUP},
-    {"DisengageOnAccelerator", PERSISTENT | BACKUP},
+    {"DisablePowerDown", PERSISTENT},
+    {"DisableUpdates", PERSISTENT},
+    {"DisengageOnAccelerator", PERSISTENT},
    {"DongleId", PERSISTENT},
    {"DoReboot", CLEAR_ON_MANAGER_START},
    {"DoShutdown", CLEAR_ON_MANAGER_START},
    {"DoUninstall", CLEAR_ON_MANAGER_START},
-    {"ExperimentalLongitudinalEnabled", PERSISTENT | DEVELOPMENT_ONLY | BACKUP},
-    {"ExperimentalMode", PERSISTENT | BACKUP},
-    {"ExperimentalModeConfirmed", PERSISTENT | BACKUP},
+    {"ExperimentalLongitudinalEnabled", PERSISTENT | DEVELOPMENT_ONLY},
+    {"ExperimentalMode", PERSISTENT},
+    {"ExperimentalModeConfirmed", PERSISTENT},
    {"FirmwareQueryDone", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
    {"ForcePowerDown", PERSISTENT},
    {"GitBranch", PERSISTENT},
    {"GitCommit", PERSISTENT},
    {"GitCommitDate", PERSISTENT},
    {"GitDiff", PERSISTENT},
-    {"GithubSshKeys", PERSISTENT | BACKUP},
-    {"GithubUsername", PERSISTENT | BACKUP},
+    {"GithubSshKeys", PERSISTENT},
+    {"GithubUsername", PERSISTENT},
    {"GitRemote", PERSISTENT},
-    {"GsmApn", PERSISTENT | BACKUP},
-    {"GsmMetered", PERSISTENT | BACKUP},
-    {"GsmRoaming", PERSISTENT | BACKUP},
+    {"GsmApn", PERSISTENT},
+    {"GsmMetered", PERSISTENT},
+    {"GsmRoaming", PERSISTENT},
    {"HardwareSerial", PERSISTENT},
    {"HasAcceptedTerms", PERSISTENT},
    {"IMEI", PERSISTENT},
    {"InstallDate", PERSISTENT},
    {"IsDriverViewEnabled", CLEAR_ON_MANAGER_START},
    {"IsEngaged", PERSISTENT},
-    {"IsLdwEnabled", PERSISTENT | BACKUP},
-    {"IsMetric", PERSISTENT | BACKUP},
+    {"IsLdwEnabled", PERSISTENT},
+    {"IsMetric", PERSISTENT},
    {"IsOffroad", CLEAR_ON_MANAGER_START},
    {"IsOnroad", PERSISTENT},
    {"IsRhdDetected", PERSISTENT},
@@ -146,7 +146,7 @@ std::unordered_map<std::string, uint32_t> keys = {
    {"IsTakingSnapshot", CLEAR_ON_MANAGER_START},
    {"IsTestedBranch", CLEAR_ON_MANAGER_START},
    {"JoystickDebugMode", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
-    {"LanguageSetting", PERSISTENT | BACKUP},
+    {"LanguageSetting", PERSISTENT},
    {"LastAthenaPingTime", CLEAR_ON_MANAGER_START},
    {"LastGPSPosition", PERSISTENT},
    {"LastManagerExitReason", CLEAR_ON_MANAGER_START},
@@ -158,7 +158,7 @@ std::unordered_map<std::string, uint32_t> keys = {
    {"LiveTorqueParameters", PERSISTENT | DONT_LOG},
    {"LocationFilterInitialState", PERSISTENT},
    {"LongitudinalManeuverMode", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
-    {"LongitudinalPersonality", PERSISTENT | BACKUP},
+    {"LongitudinalPersonality", PERSISTENT},
    {"NetworkMetered", PERSISTENT},
    {"ObdMultiplexingChanged", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
    {"ObdMultiplexingEnabled", CLEAR_ON_MANAGER_START | CLEAR_ON_ONROAD_TRANSITION},
@@ -174,17 +174,17 @@ std::unordered_map<std::string, uint32_t> keys = {
    {"Offroad_TemperatureTooHigh", CLEAR_ON_MANAGER_START},
    {"Offroad_UnofficialHardware", CLEAR_ON_MANAGER_START},
    {"Offroad_UpdateFailed", CLEAR_ON_MANAGER_START},
-    {"OpenpilotEnabledToggle", PERSISTENT | BACKUP},
+    {"OpenpilotEnabledToggle", PERSISTENT},
    {"PandaHeartbeatLost", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
    {"PandaSomResetTriggered", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
    {"PandaSignatures", CLEAR_ON_MANAGER_START},
    {"PrimeType", PERSISTENT},
-    {"RecordFront", PERSISTENT | BACKUP},
+    {"RecordFront", PERSISTENT},
    {"RecordFrontLock", PERSISTENT},  // for the internal fleet
-    {"SecOCKey", PERSISTENT | DONT_LOG},  // Candidate for | BACKUP
+    {"SecOCKey", PERSISTENT | DONT_LOG},
    {"RouteCount", PERSISTENT},
    {"SnoozeUpdate", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
-    {"SshEnabled", PERSISTENT | BACKUP},
+    {"SshEnabled", PERSISTENT},
    {"TermsVersion", PERSISTENT},
    {"TrainingVersion", PERSISTENT},
    {"UbloxAvailable", PERSISTENT},
@@ -200,28 +200,7 @@ std::unordered_map<std::string, uint32_t> keys = {
    {"UpdaterTargetBranch", CLEAR_ON_MANAGER_START},
    {"UpdaterLastFetchTime", PERSISTENT},
    {"Version", PERSISTENT},
-
-    // --- sunnypilot params --- //
-    {"EnableGithubRunner", PERSISTENT | BACKUP},
-
-    // MADS params
-    {"Mads", PERSISTENT | BACKUP},
-    {"MadsMainCruiseAllowed", PERSISTENT | BACKUP},
-    {"MadsPauseLateralOnBrake", PERSISTENT | BACKUP},
-    {"MadsUnifiedEngagementMode", PERSISTENT | BACKUP},
-
-    // Model Manager params
-    {"ModelManager_ActiveBundle", PERSISTENT},
-    {"ModelManager_DownloadIndex", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION | CLEAR_ON_ONROAD_TRANSITION},
-    {"ModelManager_LastSyncTime", CLEAR_ON_MANAGER_START | CLEAR_ON_OFFROAD_TRANSITION},
-    {"ModelManager_ModelsCache", PERSISTENT | BACKUP},
-
-    // sunnylink params
-    {"EnableSunnylinkUploader", PERSISTENT | BACKUP},
-    {"LastSunnylinkPingTime", CLEAR_ON_MANAGER_START},
-    {"SunnylinkDongleId", PERSISTENT},
-    {"SunnylinkdPid", PERSISTENT},
-    {"SunnylinkEnabled", PERSISTENT},
+    {"EnableGithubRunner", PERSISTENT},
 };

 } // namespace
@@ -16,7 +16,6 @@ enum ParamKeyType {
  CLEAR_ON_OFFROAD_TRANSITION = 0x10,
  DONT_LOG = 0x20,
  DEVELOPMENT_ONLY = 0x40,
-  BACKUP = 0x80,
  ALL = 0xFFFFFFFF
 };

@@ -42,8 +42,7 @@ dependencies = [

  # modeld
  "onnx >= 1.14.0",
-  "onnxruntime >=1.16.3; platform_system == 'Linux' and platform_machine == 'aarch64'",
-  "onnxruntime-gpu >=1.16.3; platform_system == 'Linux' and platform_machine == 'x86_64'",
+  "onnxruntime >=1.16.3",

  # logging
  "pyzmq",
@@ -164,7 +163,6 @@ testpaths = [
  "tools/replay",
  "tools/cabana",
  "cereal/messaging/tests",
-  "sunnypilot",
 ]

 [tool.codespell]
@@ -1,361 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import subprocess
-import sys
-import shutil
-import signal
-import contextlib
-import tempfile
-import os
-
-
-def run_command(command: str) -> tuple[int, str, str]:
-  """Run a shell command and return exit code, stdout, and stderr."""
-  process = subprocess.Popen(
-    command,
-    shell=True,
-    stdout=subprocess.PIPE,
-    stderr=subprocess.PIPE,
-    text=True
-  )
-  stdout, stderr = process.communicate()
-  return process.returncode, stdout.strip(), stderr.strip()
-
-
-def is_gh_available() -> bool:
-  """Check if GitHub CLI is available."""
-  return shutil.which('gh') is not None
-
-
-def get_current_branch() -> str | None:
-  """Get the name of the current git branch."""
-  code, output, error = run_command("git rev-parse --abbrev-ref HEAD")
-  if code != 0:
-    print(f"Error getting current branch: {error}")
-    return None
-  return output
-
-
-def backup_branch(branch_name: str) -> bool:
-  """Create a backup of the current branch."""
-  backup_name = f"{branch_name}-backup-$(date +%Y%m%d_%H%M%S)"
-  code, _, error = run_command(f"git branch {backup_name}")
-  if code != 0:
-    print(f"Error creating backup branch: {error}")
-    return False
-  print(f"Created backup branch: {backup_name}")
-  return True
-
-
-def get_commit_messages(source_branch: str, target_branch: str) -> list[str] | None:
-  """Get all commit messages between source and target branches."""
-  code, output, error = run_command(f"git log {target_branch}..{source_branch} --format=%B")
-  if code != 0:
-    print(f"Error getting commit messages: {error}")
-    return None
-  return [msg.strip() for msg in output.splitlines() if msg and not msg.startswith('Merge')]
-
-
-def get_pr_info(branch_name: str) -> str | None:
-  """Get PR title using GitHub CLI."""
-  if not is_gh_available():
-    print("Warning: GitHub CLI not found. Install it to auto-fetch PR titles:")
-    print("  https://cli.github.com/")
-    return None
-
-  # Try to get PR info using gh cli
-  code, output, error = run_command(f"gh pr view --json title --jq .title {branch_name}")
-  if code != 0:
-    print(f"No open PR found for branch '{branch_name}'")
-    return None
-
-  return output
-
-
-def create_squash_message(pr_title: str | None, commit_messages: list[str], source_branch: str) -> str:
-  """Create a squash commit message from PR title and commit messages."""
-  parts = []
-
-  # Add PR title if provided
-  if pr_title:
-    parts.append(pr_title)
-  else:
-    parts.append(f"Squashed changes from {source_branch}")
-  parts.append("")  # Empty line after title
-
-  # Add original commits section
-  if commit_messages:
-    parts.append("Original commits:")
-    parts.append("")  # Empty line before list
-    parts.extend(f"* {msg}" for msg in commit_messages)
-
-  return '\n'.join(parts)
-
-
-def prompt_for_title() -> str:
-  """Prompt user for a commit title."""
-  return input("Enter commit title (or press Enter to use default): ").strip()
-
-
-@contextlib.contextmanager
-def workspace_manager(original_branch: str):
-  """Context manager to handle workspace state and cleanup."""
-  stash_created = False
-  stash_restored = False
-  temp_branch: str | None = None
-
-  def cleanup_handler(signum=None, frame=None):
-    """Clean up workspace state."""
-    nonlocal temp_branch, stash_created, stash_restored
-    try:
-      if signum and stash_restored:
-        # If we're handling Ctrl+C but stash was already restored,
-        # just clean up branches and exit
-        current = get_current_branch()
-        if current and current != original_branch:
-          run_command(f"git checkout {original_branch}")
-        if temp_branch:
-          run_command(f"git branch -D {temp_branch}")
-        print("\nOperation interrupted, but changes were already restored.")
-        sys.exit(1)
-
-      # First, switch back to original branch
-      current = get_current_branch()
-      if current and current != original_branch:
-        run_command(f"git checkout {original_branch}")
-
-      # Then clean up temp branch
-      if temp_branch:
-        run_command(f"git branch -D {temp_branch}")
-
-      # Finally, restore stash if needed - AFTER switching branches
-      if stash_created and not stash_restored:
-        print("Restoring your uncommitted changes...")
-        code, stash_list, _ = run_command("git stash list")
-        if code == 0 and "Automatic stash by squash script" in stash_list:
-          run_command("git stash pop")
-          stash_restored = True
-          stash_created = False
-
-      if signum:
-        print("\nOperation interrupted. Cleaned up and restored original state.")
-        sys.exit(1)
-
-    except Exception as e:
-      print(f"Error during cleanup: {e}")
-      if signum:
-        sys.exit(1)
-
-  try:
-    # Set up signal handlers
-    signal.signal(signal.SIGINT, cleanup_handler)
-    signal.signal(signal.SIGTERM, cleanup_handler)
-
-    # Check for changes (including untracked files)
-    code, output, _ = run_command("git status --porcelain")
-    if output:
-      print("Stashing uncommitted changes...")
-      run_command("git stash push -u -m 'Automatic stash by squash script'")
-      stash_created = True
-
-    yield lambda x: setattr(x, 'temp_branch', temp_branch)
-
-  except Exception as e:
-    print(f"\nError occurred: {str(e)}")
-    cleanup_handler()
-    raise
-  finally:
-    cleanup_handler()
-
-
-def create_commit_with_message(message: str) -> bool:
-  """Create a commit with the given message using a temporary file."""
-  try:
-    with tempfile.NamedTemporaryFile(mode='w', delete=False) as f:
-      f.write(message)
-      temp_path = f.name
-
-    # Use the temporary file for the commit message
-    code, _, error = run_command(f"git commit -F {temp_path}")
-    os.unlink(temp_path)  # Clean up the temp file
-
-    if code != 0:
-      print(f"Error creating commit: {error}")
-      return False
-    return True
-  except Exception as e:
-    print(f"Error handling commit message: {e}")
-    if os.path.exists(temp_path):
-      os.unlink(temp_path)
-    return False
-
-
-def squash_and_merge(source_branch: str, target_branch: str, manual_title: str | None, backup: bool = False, push: bool = False) -> bool:
-  """
-  Squash the source branch and merge into target branch.
-  """
-  # Get original branch right away
-  original_branch = get_current_branch()
-  if not original_branch:
-    return False
-
-  class State:
-    temp_branch: str | None = None
-
-  state = State()
-
-  with workspace_manager(original_branch) as set_temp_branch:
-    # Validate source branch exists
-    code, _, error = run_command(f"git rev-parse --verify {source_branch}")
-    if code != 0:
-      print(f"Error: Source branch {source_branch} not found")
-      return False
-
-    if source_branch == target_branch:
-      print(f"Error: Source and target branches cannot be the same ({source_branch})")
-      return False
-
-    # Ensure target branch exists
-    code, _, error = run_command(f"git rev-parse --verify {target_branch}")
-    if code != 0:
-      print(f"Error: Target branch {target_branch} not found")
-      return False
-
-    # Find merge base
-    code, merge_base, error = run_command(f"git merge-base {target_branch} {source_branch}")
-    if code != 0:
-      print(f"Error finding merge base: {error}")
-      return False
-
-    # Create backup unless explicitly skipped
-    if backup and not backup_branch(source_branch):
-      return False
-
-    # Get commit messages
-    commit_messages = get_commit_messages(source_branch, target_branch)
-    if commit_messages is None:
-      return False
-
-    # Get title (priority: manual title > PR title > prompt user)
-    title = manual_title
-    if not title:
-      title = get_pr_info(source_branch)
-    if not title:
-      title = prompt_for_title()
-
-    try:
-      # Create and switch to temporary branch
-      temp_branch = f"temp-squash-{source_branch}"
-      state.temp_branch = temp_branch
-      set_temp_branch(state)
-
-      print(f"\nCreating temporary branch {temp_branch}...")
-      code, _, error = run_command(f"git checkout -b {temp_branch} {source_branch}")
-      if code != 0:
-        print(f"Error creating temp branch: {error}")
-        return False
-
-      print("Preparing squash by resetting temporary branch to merge base...")
-      code, _, error = run_command(f"git reset --soft {merge_base}")
-      if code != 0:
-        print(f"Error resetting for squash: {error}")
-        return False
-
-      # Create commit with message
-      print("Creating squash commit...")
-      squash_message = create_squash_message(title, commit_messages, source_branch)
-      if not create_commit_with_message(squash_message):
-        return False
-
-      # Switch to target and try merge
-      print(f"\nSwitching to target branch {target_branch}...")
-      code, _, error = run_command(f"git checkout {target_branch}")
-      if code != 0:
-        print(f"Error checking out target branch: {error}")
-        return False
-
-      print(f"Attempting to merge changes from {temp_branch}...")
-      code, _, error = run_command(f"git merge {temp_branch}")
-
-      if code != 0:
-        print(f"\nMerge failed with error: {error}")
-        print("\nThe squash was successful, and your changes are preserved in the temporary branch.")
-        print("To complete the merge manually, follow these steps:")
-        print(f"\n1. Your squashed changes are in branch: '{temp_branch}'")
-        print(f"2. The target branch is: '{target_branch}'")
-        print("\nTo resolve the conflicts:")
-        print(f"   git checkout {target_branch}")
-        print(f"   git merge {temp_branch}")
-        print("   # resolve conflicts in your editor")
-        print("   git add <resolved-files>")
-        print("   git commit")
-        print(f"   git push origin {target_branch}  # when ready to push")
-        print("\nTo clean up after successful merge:")
-        print(f"   git branch -D {temp_branch}")
-
-        # Make sure to abort the merge
-        print("\nAborting current merge attempt...")
-        run_command("git merge --abort")
-
-        # Return to original branch, but keep temp branch
-        print(f"Returning to {original_branch}...")
-        run_command(f"git checkout {original_branch}")
-        return False
-
-      # Clean up temp branch on success
-      run_command(f"git branch -D {temp_branch}")
-
-      # Push if requested
-      if push:
-        code, _, error = run_command(f"git push origin {target_branch}")
-        if code != 0:
-          print(f"Error pushing to {target_branch}: {error}")
-          return False
-        print(f"Successfully pushed to {target_branch}")
-      else:
-        print(f"Changes squashed and merged into {target_branch} locally")
-        print(f"To push the changes: git push origin {target_branch}")
-
-      # Return to original branch
-      code, _, error = run_command(f"git checkout {original_branch}")
-      if code != 0:
-        print(f"Warning: Failed to return to original branch: {error}")
-        return False
-
-      return True
-
-    except Exception as e:
-      print(f"Error during squash process: {e}")
-      return False
-
-
-def main():
-  parser = argparse.ArgumentParser(
-    description='Squash branch and merge into target branch'
-  )
-  parser.add_argument('--target', '-t', required=True,
-                      help='Target branch to merge changes into')
-  parser.add_argument('--source', '-s',
-                      help='Source branch to squash (default: current branch)')
-  parser.add_argument('--title', '-m',
-                      help='Optional manual title (overrides PR title)')
-  parser.add_argument('--backup', action='store_true',
-                      help='Creates a backup branch for the source branch')
-  parser.add_argument('--push', action='store_true',
-                      help='Push changes to remote after squashing')
-
-  args = parser.parse_args()
-
-  # Determine source branch early
-  source_branch = args.source
-  if not source_branch:
-    source_branch = get_current_branch()
-    if not source_branch:
-      sys.exit(1)
-
-  if not squash_and_merge(source_branch, args.target, args.title, args.backup, args.push):
-    sys.exit(1)
-
-
-if __name__ == "__main__":
-  main()
@@ -91,7 +91,7 @@ whitelist = [
  "tools/joystick/",
  "tools/longitudinal_maneuvers/",

-  "tinygrad_repo/openpilot/compile2.py",
+  "tinygrad_repo/examples/openpilot/compile3.py",
  "tinygrad_repo/extra/onnx.py",
  "tinygrad_repo/extra/onnx_ops.py",
  "tinygrad_repo/extra/thneed.py",
@@ -57,7 +57,7 @@ function run_tests() {

  if [[ -z "$FAST" ]]; then
    run "mypy" mypy $PYTHON_FILES
-    run "codespell" codespell $ALL_FILES --ignore-words=$ROOT/.codespellignore
+    run "codespell" codespell $ALL_FILES
  fi

  return $FAILED
@@ -22,8 +22,6 @@ from openpilot.selfdrive.pandad import can_capnp_to_list, can_list_to_can_capnp
 from openpilot.selfdrive.car.cruise import VCruiseHelper
 from openpilot.selfdrive.car.car_specific import MockCarState

-from openpilot.sunnypilot.mads.mads import MadsParams
-
 REPLAY = "REPLAY" in os.environ

 EventName = log.OnroadEvent.EventName
@@ -115,10 +113,6 @@ class Car:
    if not disengage_on_accelerator:
      self.CP.alternativeExperience |= ALTERNATIVE_EXPERIENCE.DISABLE_DISENGAGE_ON_GAS

-    # mads
-    MadsParams().set_alternative_experience(self.CP)
-    MadsParams().set_car_specific_params(self.CP)
-
    openpilot_enabled_toggle = self.params.get_bool("OpenpilotEnabledToggle")

    controller_available = self.CI.CC is not None and openpilot_enabled_toggle and not self.CP.dashcamOnly
@@ -19,7 +19,6 @@ from openpilot.selfdrive.controls.lib.longcontrol import LongControl
 from openpilot.selfdrive.controls.lib.vehicle_model import VehicleModel
 from openpilot.selfdrive.locationd.helpers import PoseCalibrator, Pose

-from opendbc.sunnypilot import SunnypilotParamFlags

 State = log.SelfdriveState.OpenpilotState
 LaneChangeState = log.LaneChangeState
@@ -57,9 +56,6 @@ class Controls:
    elif self.CP.lateralTuning.which() == 'torque':
      self.LaC = LatControlTorque(self.CP, self.CI)

-    data_services = list(self.sm.data.keys()) + ['selfdriveStateSP']
-    self.sm = messaging.SubMaster(data_services, poll='selfdriveState')
-
  def update(self):
    self.sm.update(15)
    if self.sm.updated["liveCalibration"]:
@@ -92,16 +88,7 @@ class Controls:

    # Check which actuators can be enabled
    standstill = abs(CS.vEgo) <= max(self.CP.minSteerSpeed, MIN_LATERAL_CONTROL_SPEED) or CS.standstill
-
-    ss_sp = self.sm['selfdriveStateSP']
-    CC.madsEnabled = ss_sp.mads.enabled
-    if ss_sp.mads.available:
-      CC.sunnypilotParams |= SunnypilotParamFlags.ENABLE_MADS.value
-      _lat_active = ss_sp.mads.active
-    else:
-      _lat_active = self.sm['selfdriveState'].active
-
-    CC.latActive = _lat_active and not CS.steerFaultTemporary and not CS.steerFaultPermanent and not standstill
+    CC.latActive = self.sm['selfdriveState'].active and not CS.steerFaultTemporary and not CS.steerFaultPermanent and not standstill
    CC.longActive = CC.enabled and not any(e.overrideLongitudinal for e in self.sm['onroadEvents']) and self.CP.openpilotLongitudinalControl

    actuators = CC.actuators
@@ -13,20 +13,6 @@ common_src = [
  "transforms/transform.cc",
 ]

-thneed_src_common = [
-  "thneed/thneed_common.cc",
-  "thneed/serialize.cc",
-]
-
-thneed_src_qcom = thneed_src_common + ["thneed/thneed_qcom2.cc"]
-thneed_src_pc = thneed_src_common + ["thneed/thneed_pc.cc"]
-thneed_src = thneed_src_qcom if arch == "larch64" else thneed_src_pc
-
-# SNPE except on Mac and ARM Linux
-snpe_lib = []
-if arch != "Darwin" and arch != "aarch64":
-  common_src += ['runners/snpemodel.cc']
-  snpe_lib += ['SNPE']

 # OpenCL is a framework on Mac
 if arch == "Darwin":
@@ -45,34 +31,24 @@ snpe_rpath_pc = f"{Dir('#').abspath}/third_party/snpe/x86_64-linux-clang"
 snpe_rpath = lenvCython['RPATH'] + [snpe_rpath_qcom if arch == "larch64" else snpe_rpath_pc]

 cython_libs = envCython["LIBS"] + libs
-snpemodel_lib = lenv.Library('snpemodel', ['runners/snpemodel.cc'])
 commonmodel_lib = lenv.Library('commonmodel', common_src)
-
-lenvCython.Program('runners/runmodel_pyx.so', 'runners/runmodel_pyx.pyx', LIBS=cython_libs, FRAMEWORKS=frameworks)
-lenvCython.Program('runners/snpemodel_pyx.so', 'runners/snpemodel_pyx.pyx', LIBS=[snpemodel_lib, snpe_lib, *cython_libs], FRAMEWORKS=frameworks, RPATH=snpe_rpath)
 lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
-
-tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath)]
+tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x]

 # Get model metadata
 fn = File("models/supercombo").abspath
 cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
 lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

-# Build thneed model
-if arch == "larch64" or GetOption('pc_thneed'):
-  tinygrad_opts = []
-  if not GetOption('pc_thneed'):
-    # use FLOAT16 on device for speed + don't cache the CL kernels for space
-    tinygrad_opts += ["FLOAT16=1", "PYOPENCL_NO_CACHE=1"]
-  cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn}.onnx {fn}.thneed"
+# Compile tinygrad model
+pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
+if arch == 'larch64':
+  device_string = 'QCOM=1'
+else:
+  device_string = 'CLANG=1 IMAGE=0'

-  lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd)
+for model_name in ['supercombo', 'dmonitoring_model']:
+  fn = File(f"models/{model_name}").abspath
+  cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
+  lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

-  fn_dm = File("models/dmonitoring_model").abspath
-  cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed"
-  lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd)
-
-  thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl'])
-  thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc'])
-  lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL'])
@@ -1,10 +1,4 @@
 #!/usr/bin/env bash

 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
-cd "$DIR/../../"
-
-if [ -f "$DIR/libthneed.so" ]; then
-  export LD_PRELOAD="$DIR/libthneed.so"
-fi
-
 exec "$DIR/dmonitoringmodeld.py" "$@"
@@ -1,8 +1,17 @@
 #!/usr/bin/env python3
 import os
+from openpilot.system.hardware import TICI
+if TICI:
+  from tinygrad.tensor import Tensor
+  from tinygrad.dtype import dtypes
+  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+  os.environ['QCOM'] = '1'
+else:
+  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
 import gc
 import math
 import time
+import pickle
 import ctypes
 import numpy as np
 from pathlib import Path
@@ -13,21 +22,20 @@ from cereal.messaging import PubMaster, SubMaster
 from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
 from openpilot.common.swaglog import cloudlog
 from openpilot.common.realtime import set_realtime_priority
-from openpilot.common.transformations.model import dmonitoringmodel_intrinsics
+from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE
 from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
 from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
-from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
 from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid

+MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
 CALIB_LEN = 3
 FEATURE_LEN = 512
 OUTPUT_SIZE = 84 + FEATURE_LEN

 PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
-MODEL_PATHS = {
-  ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed',
-  ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'}
+MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx'
+MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'

 class DriverStateResult(ctypes.Structure):
  _fields_ = [
@@ -58,29 +66,42 @@ class DMonitoringModelResult(ctypes.Structure):
 class ModelState:
  inputs: dict[str, np.ndarray]
  output: np.ndarray
-  model: ModelRunner

  def __init__(self, cl_ctx):
    assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)

    self.frame = MonitoringModelFrame(cl_ctx)
-    self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32)
-    self.inputs = {
-      'calib': np.zeros(CALIB_LEN, dtype=np.float32)}
+    self.numpy_inputs = {
+      'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
+    }

-    self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx)
-    self.model.addInput("input_img", None)
-    self.model.addInput("calib", self.inputs['calib'])
+    if TICI:
+      self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
+      with open(MODEL_PKL_PATH, "rb") as f:
+        self.model_run = pickle.load(f)
+    else:
+      self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)

  def run(self, buf:VisionBuf, calib:np.ndarray, transform:np.ndarray) -> tuple[np.ndarray, float]:
-    self.inputs['calib'][:] = calib
-
-    self.model.setInputBuffer("input_img", self.frame.prepare(buf, transform.flatten(), None).view(np.float32))
+    self.numpy_inputs['calib'][0,:] = calib

    t1 = time.perf_counter()
-    self.model.execute()
+
+    input_img_cl = self.frame.prepare(buf, transform.flatten())
+    if TICI:
+      # The imgs tensors are backed by opencl memory, only need init once
+      if 'input_img' not in self.tensor_inputs:
+        self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8)
+    else:
+      self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT))
+
+    if TICI:
+      output = self.model_run(**self.tensor_inputs).numpy().flatten()
+    else:
+      output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
+
    t2 = time.perf_counter()
-    return self.output, t2 - t1
+    return output, t2 - t1


 def fill_driver_state(msg, ds_result: DriverStateResult):
@@ -1,10 +1,4 @@
 #!/usr/bin/env bash

 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
-cd "$DIR/../../"
-
-if [ -f "$DIR/libthneed.so" ]; then
-  export LD_PRELOAD="$DIR/libthneed.so"
-fi
-
 exec "$DIR/modeld.py" "$@"
@@ -1,11 +1,13 @@
 #!/usr/bin/env python3
-import os
+from openpilot.system.hardware import TICI
+
+from openpilot.selfdrive.modeld.runners.model_runner import ONNXRunner, TinygradRunner
+
+#
 import time
-import pickle
 import numpy as np
 import cereal.messaging as messaging
 from cereal import car, log
-from pathlib import Path
 from setproctitle import setproctitle
 from cereal.messaging import PubMaster, SubMaster
 from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
@@ -18,20 +20,12 @@ from openpilot.common.transformations.camera import DEVICE_CAMERAS
 from openpilot.common.transformations.model import get_warp_matrix
 from openpilot.system import sentry
 from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
-from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
 from openpilot.selfdrive.modeld.parse_model_outputs import Parser
 from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
 from openpilot.selfdrive.modeld.constants import ModelConstants
 from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext

 PROCESS_NAME = "selfdrive.modeld.modeld"
-SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
-
-MODEL_PATHS = {
-  ModelRunner.THNEED: Path(__file__).parent / 'models/supercombo.thneed',
-  ModelRunner.ONNX: Path(__file__).parent / 'models/supercombo.onnx'}
-
-METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'


 class FrameMeta:
@@ -44,46 +38,30 @@ class FrameMeta:
      self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof

 class ModelState:
-  frame: DrivingModelFrame
-  wide_frame: DrivingModelFrame
+  frames: dict[str, DrivingModelFrame]
  inputs: dict[str, np.ndarray]
  output: np.ndarray
  prev_desire: np.ndarray  # for tracking the rising edge of the pulse
-  model: ModelRunner

  def __init__(self, context: CLContext):
-    self.frame = DrivingModelFrame(context)
-    self.wide_frame = DrivingModelFrame(context)
+    self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
    self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)

    # img buffers are managed in openCL transform code
-    self.inputs = {
-      'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
-      'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
-      'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
+    self.numpy_inputs = {
+      'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
+      'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
+      'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN,  ModelConstants.FEATURE_LEN), dtype=np.float32),
    }

-    with open(METADATA_PATH, 'rb') as f:
-      model_metadata = pickle.load(f)
-
-    self.output_slices = model_metadata['output_slices']
-    net_output_size = model_metadata['output_shapes']['outputs'][1]
-    self.output = np.zeros(net_output_size, dtype=np.float32)
+    # Initialize model runner
+    self.model_runner = TinygradRunner() if TICI else ONNXRunner(self.frames)
    self.parser = Parser()

-    self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, context)
-    self.model.addInput("input_imgs", None)
-    self.model.addInput("big_input_imgs", None)
-    for k,v in self.inputs.items():
-      self.model.addInput(k, v)
-
-  def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
-    parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
-    if SEND_RAW_PRED:
-      parsed_model_outputs['raw_pred'] = model_outputs.copy()
-    return parsed_model_outputs
+    net_output_size = self.model_runner.model_metadata['output_shapes']['outputs'][1]
+    self.output = np.zeros(net_output_size, dtype=np.float32)

  def run(self, buf: VisionBuf, wbuf: VisionBuf, transform: np.ndarray, transform_wide: np.ndarray,
                inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
@@ -94,24 +72,27 @@ class ModelState:

    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
    self.desire_20Hz[-1] = new_desire
-    self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
+    self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)

-    self.inputs['traffic_convention'][:] = inputs['traffic_convention']
+    self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
+    imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
+               'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}

-    self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
-    self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
+    # Prepare inputs using the model runner
+    self.model_runner.prepare_inputs(imgs_cl, self.numpy_inputs)

    if prepare_only:
      return None

-    self.model.execute()
-    outputs = self.parser.parse_outputs(self.slice_outputs(self.output))
+    # Run model inference
+    self.output = self.model_runner.run_model()
+    outputs = self.parser.parse_outputs(self.model_runner.slice_outputs(self.output))

    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]

    idxs = np.arange(-4,-100,-4)[::-1]
-    self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
+    self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
    return outputs


@@ -172,7 +153,6 @@ def main(demo=False):
  meta_main = FrameMeta()
  meta_extra = FrameMeta()

-
  if demo:
    CP = get_demo_car_params()
  else:
@@ -281,7 +261,6 @@ def main(demo=False):
      pm.send('modelV2', modelv2_send)
      pm.send('drivingModelData', drivingdata_send)
      pm.send('cameraOdometry', posenet_send)
-
    last_vipc_frame_id = meta_main.frame_id


@@ -7,7 +7,7 @@

 DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
  input_frames = std::make_unique<uint8_t[]>(buf_size);
-  //input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
+  input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
  region.origin = 4 * frame_size_bytes;
  region.size = frame_size_bytes;
@@ -17,7 +17,7 @@ DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context)
  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
 }

-uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
+cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);

  for (int i = 0; i < 4; i++) {
@@ -25,19 +25,12 @@ uint8_t* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_he
  }
  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);

-  if (output == NULL) {
-    CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
-    CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
-    clFinish(q);
-    return &input_frames[0];
-  } else {
-    copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
-    copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);
+  copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
+  copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);

-    // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
-    clFinish(q);
-    return NULL;
-  }
+  // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
+  clFinish(q);
+  return &input_frames_cl;
 }

 DrivingModelFrame::~DrivingModelFrame() {
@@ -51,16 +44,15 @@ DrivingModelFrame::~DrivingModelFrame() {

 MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
  input_frames = std::make_unique<uint8_t[]>(buf_size);
-  //input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
+  input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));

  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
 }
-uint8_t* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) {
+
+cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
-  CL_CHECK(clEnqueueReadBuffer(q, y_cl, CL_TRUE, 0, MODEL_FRAME_SIZE * sizeof(uint8_t), input_frames.get(), 0, nullptr, nullptr));
  clFinish(q);
-  //return &y_cl;
-  return input_frames.get();
+  return &y_cl;
 }

 MonitoringModelFrame::~MonitoringModelFrame() {
@@ -23,14 +23,12 @@ public:
    q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
  }
  virtual ~ModelFrame() {}
-  virtual uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output) { return NULL; }
-  /*
+  virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
  uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
    CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
    clFinish(q);
    return &input_frames[0];
  }
-  */

  int MODEL_WIDTH;
  int MODEL_HEIGHT;
@@ -68,7 +66,7 @@ class DrivingModelFrame : public ModelFrame {
 public:
  DrivingModelFrame(cl_device_id device_id, cl_context context);
  ~DrivingModelFrame();
-  uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
+  cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);

  const int MODEL_WIDTH = 512;
  const int MODEL_HEIGHT = 256;
@@ -78,7 +76,7 @@ public:

 private:
  LoadYUVState loadyuv;
-  cl_mem img_buffer_20hz_cl, last_img_cl;//, input_frames_cl;
+  cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl;
  cl_buffer_region region;
 };

@@ -86,7 +84,7 @@ class MonitoringModelFrame : public ModelFrame {
 public:
  MonitoringModelFrame(cl_device_id device_id, cl_context context);
  ~MonitoringModelFrame();
-  uint8_t* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection, cl_mem* output);
+  cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);

  const int MODEL_WIDTH = 1440;
  const int MODEL_HEIGHT = 960;
@@ -94,5 +92,5 @@ public:
  const int buf_size = MODEL_FRAME_SIZE;

 private:
-  // cl_mem input_frame_cl;
+  cl_mem input_frame_cl;
 };
@@ -14,8 +14,8 @@ cdef extern from "common/clutil.h":
 cdef extern from "selfdrive/modeld/models/commonmodel.h":
  cppclass ModelFrame:
    int buf_size
-    # unsigned char * buffer_from_cl(cl_mem*, int);
-    unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
+    unsigned char * buffer_from_cl(cl_mem*, int);
+    cl_mem * prepare(cl_mem, int, int, int, int, mat3)

  cppclass DrivingModelFrame:
    int buf_size
@@ -39,24 +39,17 @@ cdef class ModelFrame:
  def __dealloc__(self):
    del self.frame

-  def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
+  def prepare(self, VisionBuf buf, float[:] projection):
    cdef mat3 cprojection
    memcpy(cprojection.v, &projection[0], 9*sizeof(float))
-    cdef unsigned char * data
-    if output is None:
-      data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
-    else:
-      data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
-    if not data:
-      return None
+    cdef cl_mem * data
+    data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
+    return CLMem.create(data)

-    return np.asarray(<cnp.uint8_t[:self.buf_size]> data)
-    # return CLMem.create(data)
-
-  # def buffer_from_cl(self, CLMem in_frames):
-  #   cdef unsigned char * data2
-  #   data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
-  #   return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
+  def buffer_from_cl(self, CLMem in_frames):
+    cdef unsigned char * data2
+    data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
+    return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)


 cdef class DrivingModelFrame(ModelFrame):
@@ -74,3 +67,4 @@ cdef class MonitoringModelFrame(ModelFrame):
    self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
    self.frame = <cppModelFrame*>(self._frame)
    self.buf_size = self._frame.buf_size
+
@@ -1,27 +0,0 @@
-import os
-from openpilot.system.hardware import TICI
-from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel, Runtime
-assert Runtime
-
-USE_THNEED = int(os.getenv('USE_THNEED', str(int(TICI))))
-USE_SNPE = int(os.getenv('USE_SNPE', str(int(TICI))))
-
-class ModelRunner(RunModel):
-  THNEED = 'THNEED'
-  SNPE = 'SNPE'
-  ONNX = 'ONNX'
-
-  def __new__(cls, paths, *args, **kwargs):
-    if ModelRunner.THNEED in paths and USE_THNEED:
-      from openpilot.selfdrive.modeld.runners.thneedmodel_pyx import ThneedModel as Runner
-      runner_type = ModelRunner.THNEED
-    elif ModelRunner.SNPE in paths and USE_SNPE:
-      from openpilot.selfdrive.modeld.runners.snpemodel_pyx import SNPEModel as Runner
-      runner_type = ModelRunner.SNPE
-    elif ModelRunner.ONNX in paths:
-      from openpilot.selfdrive.modeld.runners.onnxmodel import ONNXModel as Runner
-      runner_type = ModelRunner.ONNX
-    else:
-      raise Exception("Couldn't select a model runner, make sure to pass at least one valid model path")
-
-    return Runner(str(paths[runner_type]), *args, **kwargs)
@@ -0,0 +1,92 @@
+import os
+from openpilot.system.hardware import TICI
+
+#
+if TICI:
+  from tinygrad.tensor import Tensor
+  from tinygrad.dtype import dtypes
+  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+  os.environ['QCOM'] = '1'
+else:
+  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
+import pickle
+import numpy as np
+from pathlib import Path
+from abc import ABC, abstractmethod
+from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLMem
+
+SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
+MODEL_PATH = Path(__file__).parent / '../models/supercombo.onnx'
+MODEL_PKL_PATH = Path(__file__).parent / '../models/supercombo_tinygrad.pkl'
+METADATA_PATH = Path(__file__).parent / '../models/supercombo_metadata.pkl'
+
+
+class ModelRunner(ABC):
+  """Abstract base class for model runners that defines the interface for running ML models."""
+
+  def __init__(self):
+    """Initialize the model runner with paths to model and metadata files."""
+    with open(METADATA_PATH, 'rb') as f:
+      self.model_metadata = pickle.load(f)
+    self.input_shapes = self.model_metadata['input_shapes']
+    self.output_slices = self.model_metadata['output_slices']
+    self.inputs: dict = {}
+
+  @abstractmethod
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray])-> dict:
+    """Prepare inputs for model inference."""
+
+  @abstractmethod
+  def run_model(self):
+    """Run model inference with prepared inputs."""
+
+  def slice_outputs(self, model_outputs: np.ndarray) -> dict:
+    """Slice model outputs according to metadata configuration."""
+    parsed_outputs = {k: model_outputs[np.newaxis, v] for k, v in self.output_slices.items()}
+    if SEND_RAW_PRED:
+      parsed_outputs['raw_pred'] = model_outputs.copy()
+    return parsed_outputs
+
+
+class TinygradRunner(ModelRunner):
+  """Tinygrad implementation of model runner for TICI hardware."""
+
+  def __init__(self):
+    super().__init__()
+    # Load Tinygrad model
+    with open(MODEL_PKL_PATH, "rb") as f:
+      self.model_run = pickle.load(f)
+
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
+    # Initialize image tensors if not already done
+    for key in imgs_cl:
+      if key not in self.inputs:
+        self.inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
+
+    # Update numpy inputs
+    for k, v in numpy_inputs.items():
+      if k not in self.inputs:
+        self.inputs[k] = Tensor(v, device='NPY').realize()
+
+    return self.inputs
+
+  def run_model(self):
+    return self.model_run(**self.inputs).numpy().flatten()
+
+
+class ONNXRunner(ModelRunner):
+  """ONNX implementation of model runner for non-TICI hardware."""
+
+  def __init__(self, frames: dict[str, DrivingModelFrame]):
+    super().__init__()
+    self.runner = make_onnx_cpu_runner(MODEL_PATH)
+    self.frames = frames
+
+  def prepare_inputs(self, imgs_cl: dict[str, CLMem], numpy_inputs: dict[str, np.ndarray]) -> dict:
+    self.inputs = numpy_inputs.copy()
+    for key in imgs_cl:
+      self.inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])
+    return self.inputs
+
+  def run_model(self):
+    return self.runner.run(None, self.inputs)[0].flatten()
@@ -1,71 +0,0 @@
-import os
-import onnx
-import sys
-import numpy as np
-from typing import Any
-
-from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel
-from openpilot.selfdrive.modeld.runners.ort_helpers import convert_fp16_to_fp32, ORT_TYPES_TO_NP_TYPES
-
-
-def create_ort_session(path, fp16_to_fp32):
-  os.environ["OMP_NUM_THREADS"] = "4"
-  os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
-
-  import onnxruntime as ort
-  print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr)
-  options = ort.SessionOptions()
-  options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
-
-  provider: str | tuple[str, dict[Any, Any]]
-  if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
-    provider = 'OpenVINOExecutionProvider'
-  elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
-    options.intra_op_num_threads = 2
-    provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'EXHAUSTIVE'})
-  else:
-    options.intra_op_num_threads = 2
-    options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
-    options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    provider = 'CPUExecutionProvider'
-
-  model_data = convert_fp16_to_fp32(onnx.load(path)) if fp16_to_fp32 else path
-  print("Onnx selected provider: ", [provider], file=sys.stderr)
-  ort_session = ort.InferenceSession(model_data, options, providers=[provider])
-  print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
-  return ort_session
-
-
-class ONNXModel(RunModel):
-  def __init__(self, path, output, runtime, use_tf8, cl_context):
-    self.inputs = {}
-    self.output = output
-
-    self.session = create_ort_session(path, fp16_to_fp32=True)
-    self.input_names = [x.name for x in self.session.get_inputs()]
-    self.input_shapes = {x.name: [1, *x.shape[1:]] for x in self.session.get_inputs()}
-    self.input_dtypes = {x.name: ORT_TYPES_TO_NP_TYPES[x.type] for x in self.session.get_inputs()}
-
-    # run once to initialize CUDA provider
-    if "CUDAExecutionProvider" in self.session.get_providers():
-      self.session.run(None, {k: np.zeros(self.input_shapes[k], dtype=self.input_dtypes[k]) for k in self.input_names})
-    print("ready to run onnx model", self.input_shapes, file=sys.stderr)
-
-  def addInput(self, name, buffer):
-    assert name in self.input_names
-    self.inputs[name] = buffer
-
-  def setInputBuffer(self, name, buffer):
-    assert name in self.inputs
-    self.inputs[name] = buffer
-
-  def getCLBuffer(self, name):
-    return None
-
-  def execute(self):
-    inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
-    inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
-    outputs = self.session.run(None, inputs)
-    assert len(outputs) == 1, "Only single model outputs are supported"
-    self.output[:] = outputs[0]
-    return self.output
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "selfdrive/modeld/runners/runmodel.h"
-#include "selfdrive/modeld/runners/snpemodel.h"
@@ -1,49 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-#include <memory>
-#include <cassert>
-
-#include "common/clutil.h"
-#include "common/swaglog.h"
-
-#define USE_CPU_RUNTIME 0
-#define USE_GPU_RUNTIME 1
-#define USE_DSP_RUNTIME 2
-
-struct ModelInput {
-  const std::string name;
-  float *buffer;
-  int size;
-
-  ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
-  virtual void setBuffer(float *_buffer, int _size) {
-    assert(size == _size || size == 0);
-    buffer = _buffer;
-    size = _size;
-  }
-};
-
-class RunModel {
-public:
-  std::vector<std::unique_ptr<ModelInput>> inputs;
-
-  virtual ~RunModel() {}
-  virtual void execute() {}
-  virtual void* getCLBuffer(const std::string name) { return nullptr; }
-
-  virtual void addInput(const std::string name, float *buffer, int size) {
-    inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
-  }
-  virtual void setInputBuffer(const std::string name, float *buffer, int size) {
-    for (auto &input : inputs) {
-      if (name == input->name) {
-        input->setBuffer(buffer, size);
-        return;
-      }
-    }
-    LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
-    assert(false);
-  }
-};
@@ -1,14 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-cdef extern from "selfdrive/modeld/runners/runmodel.h":
-  cdef int USE_CPU_RUNTIME
-  cdef int USE_GPU_RUNTIME
-  cdef int USE_DSP_RUNTIME
-
-  cdef cppclass RunModel:
-    void addInput(string, float*, int)
-    void setInputBuffer(string, float*, int)
-    void * getCLBuffer(string)
-    void execute()
@@ -1,6 +0,0 @@
-# distutils: language = c++
-
-from .runmodel cimport RunModel as cppRunModel
-
-cdef class RunModel:
-  cdef cppRunModel * model
@@ -1,37 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-from libcpp.string cimport string
-
-from .runmodel cimport USE_CPU_RUNTIME, USE_GPU_RUNTIME, USE_DSP_RUNTIME
-from selfdrive.modeld.models.commonmodel_pyx cimport CLMem
-
-class Runtime:
-  CPU = USE_CPU_RUNTIME
-  GPU = USE_GPU_RUNTIME
-  DSP = USE_DSP_RUNTIME
-
-cdef class RunModel:
-  def __dealloc__(self):
-    del self.model
-
-  def addInput(self, string name, float[:] buffer):
-    if buffer is not None:
-      self.model.addInput(name, &buffer[0], len(buffer))
-    else:
-      self.model.addInput(name, NULL, 0)
-
-  def setInputBuffer(self, string name, float[:] buffer):
-    if buffer is not None:
-      self.model.setInputBuffer(name, &buffer[0], len(buffer))
-    else:
-      self.model.setInputBuffer(name, NULL, 0)
-
-  def getCLBuffer(self, string name):
-    cdef void * cl_buf = self.model.getCLBuffer(name)
-    if not cl_buf:
-      return None
-    return CLMem.create(cl_buf)
-
-  def execute(self):
-    self.model.execute()
@@ -1,116 +0,0 @@
-#pragma clang diagnostic ignored "-Wexceptions"
-
-#include "selfdrive/modeld/runners/snpemodel.h"
-
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "common/util.h"
-#include "common/timing.h"
-
-void PrintErrorStringAndExit() {
-  std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
-  std::exit(EXIT_FAILURE);
-}
-
-SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
-  output = _output;
-  output_size = _output_size;
-  use_tf8 = _use_tf8;
-
-#ifdef QCOM2
-  if (runtime == USE_GPU_RUNTIME) {
-    snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
-  } else if (runtime == USE_DSP_RUNTIME) {
-    snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
-  } else {
-    snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
-  }
-  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
-#endif
-  model_data = util::read_file(path);
-  assert(model_data.size() > 0);
-
-  // load model
-  std::unique_ptr<zdl::DlContainer::IDlContainer> container = zdl::DlContainer::IDlContainer::open((uint8_t*)model_data.data(), model_data.size());
-  if (!container) { PrintErrorStringAndExit(); }
-  LOGW("loaded model with size: %lu", model_data.size());
-
-  // create model runner
-  zdl::SNPE::SNPEBuilder snpe_builder(container.get());
-  while (!snpe) {
-#ifdef QCOM2
-    snpe = snpe_builder.setOutputLayers({})
-                       .setRuntimeProcessor(snpe_runtime)
-                       .setUseUserSuppliedBuffers(true)
-                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                       .build();
-#else
-    snpe = snpe_builder.setOutputLayers({})
-                       .setUseUserSuppliedBuffers(true)
-                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                       .build();
-#endif
-    if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
-  }
-
-  // create output buffer
-  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-
-  const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
-  if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
-  const auto &output_tensor_names = *output_tensor_names_opt;
-  assert(output_tensor_names.size() == 1);
-  const char *output_tensor_name = output_tensor_names.at(0);
-  const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
-  if (output_size != 0) {
-    assert(output_size == buffer_shape[1]);
-  } else {
-    output_size = buffer_shape[1];
-  }
-  std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
-  output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
-  output_map.add(output_tensor_name, output_buffer.get());
-}
-
-void SNPEModel::addInput(const std::string name, float *buffer, int size) {
-  const int idx = inputs.size();
-  const auto &input_tensor_names_opt = snpe->getInputTensorNames();
-  if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
-  const auto &input_tensor_names = *input_tensor_names_opt;
-  const char *input_tensor_name = input_tensor_names.at(idx);
-  const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0;  // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
-  LOGW("adding index %d: %s", idx, input_tensor_name);
-
-  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-  zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
-  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-  zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
-
-  const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
-  const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
-  size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
-  std::vector<size_t> strides(buffer_shape.rank());
-  strides[strides.size() - 1] = size_of_input;
-  size_t product = 1;
-  for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
-  size_t stride = strides[strides.size() - 1];
-  for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
-    stride *= buffer_shape[i];
-    strides[i-1] = stride;
-  }
-
-  auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
-  input_map.add(input_tensor_name, input_buffer.get());
-  inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
-}
-
-void SNPEModel::execute() {
-  if (!snpe->execute(input_map, output_map)) {
-    PrintErrorStringAndExit();
-  }
-}
@@ -1,52 +0,0 @@
-#pragma once
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include <DlContainer/IDlContainer.hpp>
-#include <DlSystem/DlError.hpp>
-#include <DlSystem/ITensor.hpp>
-#include <DlSystem/ITensorFactory.hpp>
-#include <DlSystem/IUserBuffer.hpp>
-#include <DlSystem/IUserBufferFactory.hpp>
-#include <SNPE/SNPE.hpp>
-#include <SNPE/SNPEBuilder.hpp>
-#include <SNPE/SNPEFactory.hpp>
-
-#include "selfdrive/modeld/runners/runmodel.h"
-
-struct SNPEModelInput : public ModelInput {
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
-
-  SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
-  void setBuffer(float *_buffer, int _size) {
-    ModelInput::setBuffer(_buffer, _size);
-    assert(snpe_buffer->setBufferAddress(_buffer) == true);
-  }
-};
-
-class SNPEModel : public RunModel {
-public:
-  SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void addInput(const std::string name, float *buffer, int size);
-  void execute();
-
-private:
-  std::string model_data;
-
-#ifdef QCOM2
-  zdl::DlSystem::Runtime_t snpe_runtime;
-#endif
-
-  // snpe model stuff
-  std::unique_ptr<zdl::SNPE::SNPE> snpe;
-  zdl::DlSystem::UserBufferMap input_map;
-  zdl::DlSystem::UserBufferMap output_map;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;
-
-  bool use_tf8;
-  float *output;
-  size_t output_size;
-};
@@ -1,9 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-from msgq.visionipc.visionipc cimport cl_context
-
-cdef extern from "selfdrive/modeld/runners/snpemodel.h":
-  cdef cppclass SNPEModel:
-    SNPEModel(string, float*, size_t, int, bool, cl_context)
@@ -1,17 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-import os
-from libcpp cimport bool
-from libcpp.string cimport string
-
-from .snpemodel cimport SNPEModel as cppSNPEModel
-from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
-from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
-from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
-
-os.environ['ADSP_LIBRARY_PATH'] = "/data/pythonpath/third_party/snpe/dsp/"
-
-cdef class SNPEModel(RunModel):
-  def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
-    self.model = <cppRunModel *> new cppSNPEModel(path, &output[0], len(output), runtime, use_tf8, context.context)
@@ -1,58 +0,0 @@
-#include "selfdrive/modeld/runners/thneedmodel.h"
-
-#include <string>
-
-#include "common/swaglog.h"
-
-ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
-  thneed = new Thneed(true, context);
-  thneed->load(path.c_str());
-  thneed->clexec();
-
-  recorded = false;
-  output = _output;
-}
-
-void* ThneedModel::getCLBuffer(const std::string name) {
-  int index = -1;
-  for (int i = 0; i < inputs.size(); i++) {
-    if (name == inputs[i]->name) {
-      index = i;
-      break;
-    }
-  }
-
-  if (index == -1) {
-    LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
-    assert(false);
-  }
-
-  if (thneed->input_clmem.size() >= inputs.size()) {
-    return &thneed->input_clmem[inputs.size() - index - 1];
-  } else {
-    return nullptr;
-  }
-}
-
-void ThneedModel::execute() {
-  if (!recorded) {
-    thneed->record = true;
-    float *input_buffers[inputs.size()];
-    for (int i = 0; i < inputs.size(); i++) {
-      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
-    }
-
-    thneed->copy_inputs(input_buffers);
-    thneed->clexec();
-    thneed->copy_output(output);
-    thneed->stop();
-
-    recorded = true;
-  } else {
-    float *input_buffers[inputs.size()];
-    for (int i = 0; i < inputs.size(); i++) {
-      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
-    }
-    thneed->execute(input_buffers, output);
-  }
-}
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include "selfdrive/modeld/runners/runmodel.h"
-#include "selfdrive/modeld/thneed/thneed.h"
-
-class ThneedModel : public RunModel {
-public:
-  ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void *getCLBuffer(const std::string name);
-  void execute();
-private:
-  Thneed *thneed = NULL;
-  bool recorded;
-  float *output;
-};
@@ -1,9 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-from msgq.visionipc.visionipc cimport cl_context
-
-cdef extern from "selfdrive/modeld/runners/thneedmodel.h":
-  cdef cppclass ThneedModel:
-    ThneedModel(string, float*, size_t, int, bool, cl_context)
@@ -1,14 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-from libcpp cimport bool
-from libcpp.string cimport string
-
-from .thneedmodel cimport ThneedModel as cppThneedModel
-from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
-from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
-from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
-
-cdef class ThneedModel(RunModel):
-  def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
-    self.model = <cppRunModel *> new cppThneedModel(path, &output[0], len(output), runtime, use_tf8, context.context)
@@ -0,0 +1,8 @@
+
+from tinygrad.tensor import Tensor
+from tinygrad.helpers import to_mv
+
+def qcom_tensor_from_opencl_address(opencl_address, shape, dtype):
+  cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0]
+  rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer.
+  return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM')
@@ -1,8 +0,0 @@
-thneed is an SNPE accelerator. I know SNPE is already an accelerator, but sometimes things need to go even faster..
-
-It runs on the local device, and caches a single model run. Then it replays it, but fast.
-
-thneed slices through abstraction layers like a fish.
-
-You need a thneed.
-
@@ -1,154 +0,0 @@
-#include <cassert>
-#include <set>
-
-#include "third_party/json11/json11.hpp"
-#include "common/util.h"
-#include "common/clutil.h"
-#include "common/swaglog.h"
-#include "selfdrive/modeld/thneed/thneed.h"
-using namespace json11;
-
-extern map<cl_program, string> g_program_source;
-
-void Thneed::load(const char *filename) {
-  LOGD("Thneed::load: loading from %s\n", filename);
-
-  string buf = util::read_file(filename);
-  int jsz = *(int *)buf.data();
-  string jsonerr;
-  string jj(buf.data() + sizeof(int), jsz);
-  Json jdat = Json::parse(jj, jsonerr);
-
-  map<cl_mem, cl_mem> real_mem;
-  real_mem[NULL] = NULL;
-
-  int ptr = sizeof(int)+jsz;
-  for (auto &obj : jdat["objects"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    cl_mem clbuf = NULL;
-
-    if (mobj["buffer_id"].string_value().size() > 0) {
-      // image buffer must already be allocated
-      clbuf = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-      assert(mobj["needs_load"].bool_value() == false);
-    } else {
-      if (mobj["needs_load"].bool_value()) {
-        clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, &buf[ptr], NULL);
-        if (debug >= 1) printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
-        ptr += sz;
-      } else {
-        // TODO: is there a faster way to init zeroed out buffers?
-        void *host_zeros = calloc(sz, 1);
-        clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, host_zeros, NULL);
-        free(host_zeros);
-      }
-    }
-    assert(clbuf != NULL);
-
-    if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
-      cl_image_desc desc = {0};
-      desc.image_type = (mobj["arg_type"] == "image2d_t") ? CL_MEM_OBJECT_IMAGE2D : CL_MEM_OBJECT_IMAGE1D_BUFFER;
-      desc.image_width = mobj["width"].int_value();
-      desc.image_height = mobj["height"].int_value();
-      desc.image_row_pitch = mobj["row_pitch"].int_value();
-      assert(sz == desc.image_height*desc.image_row_pitch);
-#ifdef QCOM2
-      desc.buffer = clbuf;
-#else
-      // TODO: we are creating unused buffers on PC
-      clReleaseMemObject(clbuf);
-#endif
-      cl_image_format format = {0};
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = mobj["float32"].bool_value() ? CL_FLOAT : CL_HALF_FLOAT;
-
-      cl_int errcode;
-
-#ifndef QCOM2
-      if (mobj["needs_load"].bool_value()) {
-        clbuf = clCreateImage(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, &format, &desc, &buf[ptr-sz], &errcode);
-      } else {
-        clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
-      }
-#else
-      clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
-#endif
-      if (clbuf == NULL) {
-        LOGE("clError: %s create image %zux%zu rp %zu with buffer %p\n", cl_get_error_string(errcode),
-             desc.image_width, desc.image_height, desc.image_row_pitch, desc.buffer);
-      }
-      assert(clbuf != NULL);
-    }
-
-    real_mem[*(cl_mem*)(mobj["id"].string_value().data())] = clbuf;
-  }
-
-  map<string, cl_program> g_programs;
-  for (const auto &[name, source] : jdat["programs"].object_items()) {
-    if (debug >= 1) printf("building %s with size %zu\n", name.c_str(), source.string_value().size());
-    g_programs[name] = cl_program_from_source(context, device_id, source.string_value());
-  }
-
-  for (auto &obj : jdat["inputs"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    cl_mem aa = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-    input_clmem.push_back(aa);
-    input_sizes.push_back(sz);
-    LOGD("Thneed::load: adding input %s with size %d\n", mobj["name"].string_value().data(), sz);
-
-    cl_int cl_err;
-    void *ret = clEnqueueMapBuffer(command_queue, aa, CL_TRUE, CL_MAP_WRITE, 0, sz, 0, NULL, NULL, &cl_err);
-    if (cl_err != CL_SUCCESS) LOGE("clError: %s map %p %d\n", cl_get_error_string(cl_err), aa, sz);
-    assert(cl_err == CL_SUCCESS);
-    inputs.push_back(ret);
-  }
-
-  for (auto &obj : jdat["outputs"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    LOGD("Thneed::save: adding output with size %d\n", sz);
-    // TODO: support multiple outputs
-    output = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-    assert(output != NULL);
-  }
-
-  for (auto &obj : jdat["binaries"].array_items()) {
-    string name = obj["name"].string_value();
-    size_t length = obj["length"].int_value();
-    if (debug >= 1) printf("binary %s with size %zu\n", name.c_str(), length);
-    g_programs[name] = cl_program_from_binary(context, device_id, (const uint8_t*)&buf[ptr], length);
-    ptr += length;
-  }
-
-  for (auto &obj : jdat["kernels"].array_items()) {
-    auto gws = obj["global_work_size"];
-    auto lws = obj["local_work_size"];
-    auto kk = shared_ptr<CLQueuedKernel>(new CLQueuedKernel(this));
-
-    kk->name = obj["name"].string_value();
-    kk->program = g_programs[kk->name];
-    kk->work_dim = obj["work_dim"].int_value();
-    for (int i = 0; i < kk->work_dim; i++) {
-      kk->global_work_size[i] = gws[i].int_value();
-      kk->local_work_size[i] = lws[i].int_value();
-    }
-    kk->num_args = obj["num_args"].int_value();
-    for (int i = 0; i < kk->num_args; i++) {
-      string arg = obj["args"].array_items()[i].string_value();
-      int arg_size = obj["args_size"].array_items()[i].int_value();
-      kk->args_size.push_back(arg_size);
-      if (arg_size == 8) {
-        cl_mem val = *(cl_mem*)(arg.data());
-        val = real_mem[val];
-        kk->args.push_back(string((char*)&val, sizeof(val)));
-      } else {
-        kk->args.push_back(arg);
-      }
-    }
-    kq.push_back(kk);
-  }
-
-  clFinish(command_queue);
-}
@@ -1,133 +0,0 @@
-#pragma once
-
-#ifndef __user
-#define __user __attribute__(())
-#endif
-
-#include <cstdint>
-#include <cstdlib>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <CL/cl.h>
-
-#include "third_party/linux/include/msm_kgsl.h"
-
-using namespace std;
-
-cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
-
-namespace json11 {
-  class Json;
-}
-class Thneed;
-
-class GPUMalloc {
-  public:
-    GPUMalloc(int size, int fd);
-    ~GPUMalloc();
-    void *alloc(int size);
-  private:
-    uint64_t base;
-    int remaining;
-};
-
-class CLQueuedKernel {
-  public:
-    CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
-    CLQueuedKernel(Thneed *lthneed,
-                   cl_kernel _kernel,
-                   cl_uint _work_dim,
-                   const size_t *_global_work_size,
-                   const size_t *_local_work_size);
-    cl_int exec();
-    void debug_print(bool verbose);
-    int get_arg_num(const char *search_arg_name);
-    cl_program program;
-    string name;
-    cl_uint num_args;
-    vector<string> arg_names;
-    vector<string> arg_types;
-    vector<string> args;
-    vector<int> args_size;
-    cl_kernel kernel = NULL;
-    json11::Json to_json() const;
-
-    cl_uint work_dim;
-    size_t global_work_size[3] = {0};
-    size_t local_work_size[3] = {0};
-  private:
-    Thneed *thneed;
-};
-
-class CachedIoctl {
-  public:
-    virtual void exec() {}
-};
-
-class CachedSync: public CachedIoctl {
-  public:
-    CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
-    void exec();
-  private:
-    Thneed *thneed;
-    string data;
-};
-
-class CachedCommand: public CachedIoctl {
-  public:
-    CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
-    void exec();
-  private:
-    void disassemble(int cmd_index);
-    struct kgsl_gpu_command cache;
-    unique_ptr<kgsl_command_object[]> cmds;
-    unique_ptr<kgsl_command_object[]> objs;
-    Thneed *thneed;
-    vector<shared_ptr<CLQueuedKernel> > kq;
-};
-
-class Thneed {
-  public:
-    Thneed(bool do_clinit=false, cl_context _context = NULL);
-    void stop();
-    void execute(float **finputs, float *foutput, bool slow=false);
-    void wait();
-
-    vector<cl_mem> input_clmem;
-    vector<void *> inputs;
-    vector<size_t> input_sizes;
-    cl_mem output = NULL;
-
-    cl_context context = NULL;
-    cl_command_queue command_queue;
-    cl_device_id device_id;
-    int context_id;
-
-    // protected?
-    bool record = false;
-    int debug;
-    int timestamp;
-
-#ifdef QCOM2
-    unique_ptr<GPUMalloc> ram;
-    vector<unique_ptr<CachedIoctl> > cmds;
-    int fd;
-#endif
-
-    // all CL kernels
-    void copy_inputs(float **finputs, bool internal=false);
-    void copy_output(float *foutput);
-    cl_int clexec();
-    vector<shared_ptr<CLQueuedKernel> > kq;
-
-    // pending CL kernels
-    vector<shared_ptr<CLQueuedKernel> > ckq;
-
-    // loading
-    void load(const char *filename);
-  private:
-    void clinit();
-};
-
@@ -1,216 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <cassert>
-#include <cstring>
-#include <map>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-map<pair<cl_kernel, int>, string> g_args;
-map<pair<cl_kernel, int>, int> g_args_size;
-map<cl_program, string> g_program_source;
-
-void Thneed::stop() {
-  //printf("Thneed::stop: recorded %lu commands\n", cmds.size());
-  record = false;
-}
-
-void Thneed::clinit() {
-  device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
-  if (context == NULL) context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
-  //cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
-  cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
-  command_queue = CL_CHECK_ERR(clCreateCommandQueueWithProperties(context, device_id, props, &err));
-  printf("Thneed::clinit done\n");
-}
-
-cl_int Thneed::clexec() {
-  if (debug >= 1) printf("Thneed::clexec: running %lu queued kernels\n", kq.size());
-  for (auto &k : kq) {
-    if (record) ckq.push_back(k);
-    cl_int ret = k->exec();
-    assert(ret == CL_SUCCESS);
-  }
-  return clFinish(command_queue);
-}
-
-void Thneed::copy_inputs(float **finputs, bool internal) {
-  for (int idx = 0; idx < inputs.size(); ++idx) {
-    if (debug >= 1) printf("copying %lu -- %p -> %p (cl %p)\n", input_sizes[idx], finputs[idx], inputs[idx], input_clmem[idx]);
-
-    if (internal) {
-      // if it's internal, using memcpy is fine since the buffer sync is cached in the ioctl layer
-      if (finputs[idx] != NULL) memcpy(inputs[idx], finputs[idx], input_sizes[idx]);
-    } else {
-      if (finputs[idx] != NULL) CL_CHECK(clEnqueueWriteBuffer(command_queue, input_clmem[idx], CL_TRUE, 0, input_sizes[idx], finputs[idx], 0, NULL, NULL));
-    }
-  }
-}
-
-void Thneed::copy_output(float *foutput) {
-  if (output != NULL) {
-    size_t sz;
-    clGetMemObjectInfo(output, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-    if (debug >= 1) printf("copying %lu for output %p -> %p\n", sz, output, foutput);
-    CL_CHECK(clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sz, foutput, 0, NULL, NULL));
-  } else {
-    printf("CAUTION: model output is NULL, does it have no outputs?\n");
-  }
-}
-
-// *********** CLQueuedKernel ***********
-
-CLQueuedKernel::CLQueuedKernel(Thneed *lthneed,
-                               cl_kernel _kernel,
-                               cl_uint _work_dim,
-                               const size_t *_global_work_size,
-                               const size_t *_local_work_size) {
-  thneed = lthneed;
-  kernel = _kernel;
-  work_dim = _work_dim;
-  assert(work_dim <= 3);
-  for (int i = 0; i < work_dim; i++) {
-    global_work_size[i] = _global_work_size[i];
-    local_work_size[i] = _local_work_size[i];
-  }
-
-  char _name[0x100];
-  clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, sizeof(_name), _name, NULL);
-  name = string(_name);
-  clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL);
-
-  // get args
-  for (int i = 0; i < num_args; i++) {
-    char arg_name[0x100] = {0};
-    clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-    arg_names.push_back(string(arg_name));
-    clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_name), arg_name, NULL);
-    arg_types.push_back(string(arg_name));
-
-    args.push_back(g_args[make_pair(kernel, i)]);
-    args_size.push_back(g_args_size[make_pair(kernel, i)]);
-  }
-
-  // get program
-  clGetKernelInfo(kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, NULL);
-}
-
-int CLQueuedKernel::get_arg_num(const char *search_arg_name) {
-  for (int i = 0; i < num_args; i++) {
-    if (arg_names[i] == search_arg_name) return i;
-  }
-  printf("failed to find %s in %s\n", search_arg_name, name.c_str());
-  assert(false);
-}
-
-cl_int CLQueuedKernel::exec() {
-  if (kernel == NULL) {
-    kernel = clCreateKernel(program, name.c_str(), NULL);
-    arg_names.clear();
-    arg_types.clear();
-
-    for (int j = 0; j < num_args; j++) {
-      char arg_name[0x100] = {0};
-      clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      arg_names.push_back(string(arg_name));
-      clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_name), arg_name, NULL);
-      arg_types.push_back(string(arg_name));
-
-      cl_int ret;
-      if (args[j].size() != 0) {
-        assert(args[j].size() == args_size[j]);
-        ret = thneed_clSetKernelArg(kernel, j, args[j].size(), args[j].data());
-      } else {
-        ret = thneed_clSetKernelArg(kernel, j, args_size[j], NULL);
-      }
-      assert(ret == CL_SUCCESS);
-    }
-  }
-
-  if (thneed->debug >= 1) {
-    debug_print(thneed->debug >= 2);
-  }
-
-  return clEnqueueNDRangeKernel(thneed->command_queue,
-    kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-}
-
-void CLQueuedKernel::debug_print(bool verbose) {
-  printf("%p %56s -- ", kernel, name.c_str());
-  for (int i = 0; i < work_dim; i++) {
-    printf("%4zu ", global_work_size[i]);
-  }
-  printf(" -- ");
-  for (int i = 0; i < work_dim; i++) {
-    printf("%4zu ", local_work_size[i]);
-  }
-  printf("\n");
-
-  if (verbose) {
-    for (int i = 0; i < num_args; i++) {
-      string arg = args[i];
-      printf("  %s %s", arg_types[i].c_str(), arg_names[i].c_str());
-      void *arg_value = (void*)arg.data();
-      int arg_size = arg.size();
-      if (arg_size == 0) {
-        printf(" (size) %d", args_size[i]);
-      } else if (arg_size == 1) {
-        printf(" = %d", *((char*)arg_value));
-      } else if (arg_size == 2) {
-        printf(" = %d", *((short*)arg_value));
-      } else if (arg_size == 4) {
-        if (arg_types[i] == "float") {
-          printf(" = %f", *((float*)arg_value));
-        } else {
-          printf(" = %d", *((int*)arg_value));
-        }
-      } else if (arg_size == 8) {
-        cl_mem val = (cl_mem)(*((uintptr_t*)arg_value));
-        printf(" = %p", val);
-        if (val != NULL) {
-          cl_mem_object_type obj_type;
-          clGetMemObjectInfo(val, CL_MEM_TYPE, sizeof(obj_type), &obj_type, NULL);
-          if (arg_types[i] == "image2d_t" || arg_types[i] == "image1d_t" || obj_type == CL_MEM_OBJECT_IMAGE2D) {
-            cl_image_format format;
-            size_t width, height, depth, array_size, row_pitch, slice_pitch;
-            cl_mem buf;
-            clGetImageInfo(val, CL_IMAGE_FORMAT, sizeof(format), &format, NULL);
-            assert(format.image_channel_order == CL_RGBA);
-            assert(format.image_channel_data_type == CL_HALF_FLOAT || format.image_channel_data_type == CL_FLOAT);
-            clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
-            clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
-            clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL);
-            clGetImageInfo(val, CL_IMAGE_DEPTH, sizeof(depth), &depth, NULL);
-            clGetImageInfo(val, CL_IMAGE_ARRAY_SIZE, sizeof(array_size), &array_size, NULL);
-            clGetImageInfo(val, CL_IMAGE_SLICE_PITCH, sizeof(slice_pitch), &slice_pitch, NULL);
-            assert(depth == 0);
-            assert(array_size == 0);
-            assert(slice_pitch == 0);
-
-            clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
-            size_t sz = 0;
-            if (buf != NULL) clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-            printf(" image %zu x %zu rp %zu @ %p buffer %zu", width, height, row_pitch, buf, sz);
-          } else {
-            size_t sz;
-            clGetMemObjectInfo(val, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-            printf(" buffer %zu", sz);
-          }
-        }
-      }
-      printf("\n");
-    }
-  }
-}
-
-cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
-  g_args_size[make_pair(kernel, arg_index)] = arg_size;
-  if (arg_value != NULL) {
-    g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
-  } else {
-    g_args[make_pair(kernel, arg_index)] = string("");
-  }
-  cl_int ret = clSetKernelArg(kernel, arg_index, arg_size, arg_value);
-  return ret;
-}
@@ -1,32 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <cassert>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-Thneed::Thneed(bool do_clinit, cl_context _context) {
-  context = _context;
-  if (do_clinit) clinit();
-  char *thneed_debug_env = getenv("THNEED_DEBUG");
-  debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
-}
-
-void Thneed::execute(float **finputs, float *foutput, bool slow) {
-  uint64_t tb, te;
-  if (debug >= 1) tb = nanos_since_boot();
-
-  // ****** copy inputs
-  copy_inputs(finputs);
-
-  // ****** run commands
-  clexec();
-
-  // ****** copy outputs
-  copy_output(foutput);
-
-  if (debug >= 1) {
-    te = nanos_since_boot();
-    printf("model exec in %lu us\n", (te-tb)/1000);
-  }
-}
@@ -1,258 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <dlfcn.h>
-#include <sys/mman.h>
-
-#include <cassert>
-#include <cerrno>
-#include <cstring>
-#include <map>
-#include <string>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-Thneed *g_thneed = NULL;
-int g_fd = -1;
-
-void hexdump(uint8_t *d, int len) {
-  assert((len%4) == 0);
-  printf("  dumping %p len 0x%x\n", d, len);
-  for (int i = 0; i < len/4; i++) {
-    if (i != 0 && (i%0x10) == 0) printf("\n");
-    printf("%8x ", d[i]);
-  }
-  printf("\n");
-}
-
-// *********** ioctl interceptor ***********
-
-extern "C" {
-
-int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
-#undef ioctl
-int ioctl(int filedes, unsigned long request, void *argp) {
-  request &= 0xFFFFFFFF;  // needed on QCOM2
-  if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
-  Thneed *thneed = g_thneed;
-
-  // save the fd
-  if (request == IOCTL_KGSL_GPUOBJ_ALLOC) g_fd = filedes;
-
-  // note that this runs always, even without a thneed object
-  if (request == IOCTL_KGSL_DRAWCTXT_CREATE) {
-    struct kgsl_drawctxt_create *create = (struct kgsl_drawctxt_create *)argp;
-    create->flags &= ~KGSL_CONTEXT_PRIORITY_MASK;
-    create->flags |= 6 << KGSL_CONTEXT_PRIORITY_SHIFT;   // priority from 1-15, 1 is max priority
-    printf("IOCTL_KGSL_DRAWCTXT_CREATE: creating context with flags 0x%x\n", create->flags);
-  }
-
-  if (thneed != NULL) {
-    if (request == IOCTL_KGSL_GPU_COMMAND) {
-      struct kgsl_gpu_command *cmd = (struct kgsl_gpu_command *)argp;
-      if (thneed->record) {
-        thneed->timestamp = cmd->timestamp;
-        thneed->context_id = cmd->context_id;
-        thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
-      }
-      if (thneed->debug >= 1) {
-        printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx    context_id: %u  timestamp: %u  numcmds: %d  numobjs: %d\n",
-            thneed->cmds.size(),
-            cmd->flags,
-            cmd->context_id, cmd->timestamp, cmd->numcmds, cmd->numobjs);
-      }
-    } else if (request == IOCTL_KGSL_GPUOBJ_SYNC) {
-      struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp;
-      struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs);
-
-      if (thneed->debug >= 2) {
-        printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count);
-        for (int i = 0; i < cmd->count; i++) {
-          printf(" -- offset:0x%lx len:0x%lx id:%d op:%d  ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);
-        }
-        printf("\n");
-      }
-
-      if (thneed->record) {
-        thneed->cmds.push_back(unique_ptr<CachedSync>(new
-              CachedSync(thneed, string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count))));
-      }
-    } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
-      struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
-      if (thneed->debug >= 1) {
-        printf("IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID: context_id: %d  timestamp: %d  timeout: %d\n",
-            cmd->context_id, cmd->timestamp, cmd->timeout);
-      }
-    } else if (request == IOCTL_KGSL_SETPROPERTY) {
-      if (thneed->debug >= 1) {
-        struct kgsl_device_getproperty *prop = (struct kgsl_device_getproperty *)argp;
-        printf("IOCTL_KGSL_SETPROPERTY: 0x%x sizebytes:%zu\n", prop->type, prop->sizebytes);
-        if (thneed->debug >= 2) {
-          hexdump((uint8_t *)prop->value, prop->sizebytes);
-          if (prop->type == KGSL_PROP_PWR_CONSTRAINT) {
-            struct kgsl_device_constraint *constraint = (struct kgsl_device_constraint *)prop->value;
-            hexdump((uint8_t *)constraint->data, constraint->size);
-          }
-        }
-      }
-    } else if (request == IOCTL_KGSL_DRAWCTXT_CREATE || request == IOCTL_KGSL_DRAWCTXT_DESTROY) {
-      // this happens
-    } else if (request == IOCTL_KGSL_GPUOBJ_ALLOC || request == IOCTL_KGSL_GPUOBJ_FREE) {
-      // this happens
-    } else {
-      if (thneed->debug >= 1) {
-        printf("other ioctl %lx\n", request);
-      }
-    }
-  }
-
-  int ret = my_ioctl(filedes, request, argp);
-  // NOTE: This error message goes into stdout and messes up pyenv
-  // if (ret != 0) printf("ioctl returned %d with errno %d\n", ret, errno);
-  return ret;
-}
-
-}
-
-// *********** GPUMalloc ***********
-
-GPUMalloc::GPUMalloc(int size, int fd) {
-  struct kgsl_gpuobj_alloc alloc;
-  memset(&alloc, 0, sizeof(alloc));
-  alloc.size = size;
-  alloc.flags = 0x10000a00;
-  ioctl(fd, IOCTL_KGSL_GPUOBJ_ALLOC, &alloc);
-  void *addr = mmap64(NULL, alloc.mmapsize, 0x3, 0x1, fd, alloc.id*0x1000);
-  assert(addr != MAP_FAILED);
-
-  base = (uint64_t)addr;
-  remaining = size;
-}
-
-GPUMalloc::~GPUMalloc() {
-  // TODO: free the GPU malloced area
-}
-
-void *GPUMalloc::alloc(int size) {
-  void *ret = (void*)base;
-  size = (size+0xff) & (~0xFF);
-  assert(size <= remaining);
-  remaining -= size;
-  base += size;
-  return ret;
-}
-
-// *********** CachedSync, at the ioctl layer ***********
-
-void CachedSync::exec() {
-  struct kgsl_gpuobj_sync cmd;
-
-  cmd.objs = (uint64_t)data.data();
-  cmd.obj_len = data.length();
-  cmd.count = data.length() / sizeof(struct kgsl_gpuobj_sync_obj);
-
-  int ret = ioctl(thneed->fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
-  assert(ret == 0);
-}
-
-// *********** CachedCommand, at the ioctl layer ***********
-
-CachedCommand::CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd) {
-  thneed = lthneed;
-  assert(cmd->numsyncs == 0);
-
-  memcpy(&cache, cmd, sizeof(cache));
-
-  if (cmd->numcmds > 0) {
-    cmds = make_unique<struct kgsl_command_object[]>(cmd->numcmds);
-    memcpy(cmds.get(), (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*cmd->numcmds);
-    cache.cmdlist = (uint64_t)cmds.get();
-    for (int i = 0; i < cmd->numcmds; i++) {
-      void *nn = thneed->ram->alloc(cmds[i].size);
-      memcpy(nn, (void*)cmds[i].gpuaddr, cmds[i].size);
-      cmds[i].gpuaddr = (uint64_t)nn;
-    }
-  }
-
-  if (cmd->numobjs > 0) {
-    objs = make_unique<struct kgsl_command_object[]>(cmd->numobjs);
-    memcpy(objs.get(), (void *)cmd->objlist, sizeof(struct kgsl_command_object)*cmd->numobjs);
-    cache.objlist = (uint64_t)objs.get();
-    for (int i = 0; i < cmd->numobjs; i++) {
-      void *nn = thneed->ram->alloc(objs[i].size);
-      memset(nn, 0, objs[i].size);
-      objs[i].gpuaddr = (uint64_t)nn;
-    }
-  }
-
-  kq = thneed->ckq;
-  thneed->ckq.clear();
-}
-
-void CachedCommand::exec() {
-  cache.timestamp = ++thneed->timestamp;
-  int ret = ioctl(thneed->fd, IOCTL_KGSL_GPU_COMMAND, &cache);
-
-  if (thneed->debug >= 1) printf("CachedCommand::exec got %d\n", ret);
-
-  if (thneed->debug >= 2) {
-    for (auto &it : kq) {
-      it->debug_print(false);
-    }
-  }
-
-  assert(ret == 0);
-}
-
-// *********** Thneed ***********
-
-Thneed::Thneed(bool do_clinit, cl_context _context) {
-  // TODO: QCOM2 actually requires a different context
-  //context = _context;
-  if (do_clinit) clinit();
-  assert(g_fd != -1);
-  fd = g_fd;
-  ram = make_unique<GPUMalloc>(0x80000, fd);
-  timestamp = -1;
-  g_thneed = this;
-  char *thneed_debug_env = getenv("THNEED_DEBUG");
-  debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
-}
-
-void Thneed::wait() {
-  struct kgsl_device_waittimestamp_ctxtid wait;
-  wait.context_id = context_id;
-  wait.timestamp = timestamp;
-  wait.timeout = -1;
-
-  uint64_t tb = nanos_since_boot();
-  int wret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
-  uint64_t te = nanos_since_boot();
-
-  if (debug >= 1) printf("wait %d after %lu us\n", wret, (te-tb)/1000);
-}
-
-void Thneed::execute(float **finputs, float *foutput, bool slow) {
-  uint64_t tb, te;
-  if (debug >= 1) tb = nanos_since_boot();
-
-  // ****** copy inputs
-  copy_inputs(finputs, true);
-
-  // ****** run commands
-  int i = 0;
-  for (auto &it : cmds) {
-    ++i;
-    if (debug >= 1) printf("run %2d @ %7lu us: ", i, (nanos_since_boot()-tb)/1000);
-    it->exec();
-    if ((i == cmds.size()) || slow) wait();
-  }
-
-  // ****** copy outputs
-  copy_output(foutput);
-
-  if (debug >= 1) {
-    te = nanos_since_boot();
-    printf("model exec in %lu us\n", (te-tb)/1000);
-  }
-}
@@ -137,8 +137,8 @@ void Panda::enable_deepsleep() {
  handle->control_write(0xfb, 0, 0);
 }

-void Panda::send_heartbeat(bool engaged, bool engaged_mads) {
-  handle->control_write(0xf3, engaged, engaged_mads);
+void Panda::send_heartbeat(bool engaged) {
+  handle->control_write(0xf3, engaged, 0);
 }

 void Panda::set_can_speed_kbps(uint16_t bus, uint16_t speed) {
@@ -75,7 +75,7 @@ public:
  std::optional<std::string> get_serial();
  void set_power_saving(bool power_saving);
  void enable_deepsleep();
-  void send_heartbeat(bool engaged, bool engaged_mads);
+  void send_heartbeat(bool engaged);
  void set_can_speed_kbps(uint16_t bus, uint16_t speed);
  void set_can_fd_auto(uint16_t bus, bool enabled);
  void set_data_speed_kbps(uint16_t bus, uint16_t speed);
@@ -41,8 +41,6 @@
 #define CUTOFF_IL 400
 #define SATURATE_IL 1000

-#define ALT_EXP_DISENGAGE_LATERAL_ON_BRAKE 2048
-
 ExitHandler do_exit;

 bool check_all_connected(const std::vector<Panda *> &pandas) {
@@ -55,18 +53,6 @@ bool check_all_connected(const std::vector<Panda *> &pandas) {
  return true;
 }

-bool process_mads_heartbeat(SubMaster *sm) {
-  const int &alt_exp = (*sm)["carParams"].getCarParams().getAlternativeExperience();
-  const bool disengage_lateral_on_brake = (alt_exp & ALT_EXP_DISENGAGE_LATERAL_ON_BRAKE) != 0;
-
-  const auto &mads = (*sm)["selfdriveStateSP"].getSelfdriveStateSP().getMads();
-  const bool heartbeat_type = disengage_lateral_on_brake ? mads.getActive() : mads.getEnabled();
-
-  const bool engaged = sm->allAliveAndValid({"selfdriveStateSP"}) && heartbeat_type;
-
-  return engaged;
-}
-
 Panda *connect(std::string serial="", uint32_t index=0) {
  std::unique_ptr<Panda> panda;
  try {
@@ -158,7 +144,6 @@ void fill_panda_state(cereal::PandaState::Builder &ps, cereal::PandaState::Panda
  ps.setIgnitionLine(health.ignition_line_pkt);
  ps.setIgnitionCan(health.ignition_can_pkt);
  ps.setControlsAllowed(health.controls_allowed_pkt);
-  ps.setControlsAllowedLat(health.controls_allowed_lat_pkt);
  ps.setTxBufferOverflow(health.tx_buffer_overflow_pkt);
  ps.setRxBufferOverflow(health.rx_buffer_overflow_pkt);
  ps.setPandaType(hw_type);
@@ -342,7 +327,7 @@ void send_peripheral_state(Panda *panda, PubMaster *pm) {
 }

 void process_panda_state(std::vector<Panda *> &pandas, PubMaster *pm, bool spoofing_started) {
-  static SubMaster sm({"selfdriveState", "selfdriveStateSP", "carParams"});
+  static SubMaster sm({"selfdriveState"});

  std::vector<std::string> connected_serials;
  for (Panda *p : pandas) {
@@ -381,9 +366,8 @@ void process_panda_state(std::vector<Panda *> &pandas, PubMaster *pm, bool spoof

    sm.update(0);
    const bool engaged = sm.allAliveAndValid({"selfdriveState"}) && sm["selfdriveState"].getSelfdriveState().getEnabled();
-    const bool engaged_mads = process_mads_heartbeat(&sm);
    for (const auto &panda : pandas) {
-      panda->send_heartbeat(engaged, engaged_mads);
+      panda->send_heartbeat(engaged);
    }
  }
 }
@@ -105,24 +105,6 @@ class Events:
      ret.append(event)
    return ret

-  def has(self, event_name: int) -> bool:
-    return event_name in self.events
-
-  def contains_in_list(self, events_list: list[int]) -> bool:
-    return any(event_name in self.events for event_name in events_list)
-
-  def remove(self, event_name: int, static: bool = False) -> None:
-    if static and event_name in self.static_events:
-      self.static_events.remove(event_name)
-
-    if event_name in self.events:
-      self.event_counters[event_name] = self.event_counters[event_name] + 1
-      self.events.remove(event_name)
-
-  def replace(self, prev_event_name: int, cur_event_name: int, static: bool = False) -> None:
-    self.remove(prev_event_name, static)
-    self.add(cur_event_name, static)
-

 class Alert:
  def __init__(self,
@@ -969,99 +951,6 @@ EVENTS: dict[int, dict[str, Alert | AlertCallbackType]] = {
    ET.WARNING: personality_changed_alert,
  },

-  # sunnypilot
-  EventName.lkasEnable: {
-    ET.ENABLE: EngagementAlert(AudibleAlert.engage),
-  },
-
-  EventName.lkasDisable: {
-    ET.USER_DISABLE: EngagementAlert(AudibleAlert.disengage),
-  },
-
-  EventName.manualSteeringRequired: {
-    ET.USER_DISABLE: Alert(
-      "Automatic Lane Centering is OFF",
-      "Manual Steering Required",
-      AlertStatus.normal, AlertSize.mid,
-      Priority.LOW, VisualAlert.none, AudibleAlert.disengage, 1.),
-  },
-
-  EventName.manualLongitudinalRequired: {
-    ET.WARNING: Alert(
-      "Smart/Adaptive Cruise Control: OFF",
-      "Manual Speed Control Required",
-      AlertStatus.normal, AlertSize.mid,
-      Priority.LOW, VisualAlert.none, AudibleAlert.none, 1.),
-  },
-
-  EventName.silentLkasEnable: {
-    ET.ENABLE: EngagementAlert(AudibleAlert.none),
-  },
-
-  EventName.silentLkasDisable: {
-    ET.USER_DISABLE: EngagementAlert(AudibleAlert.none),
-  },
-
-  EventName.silentBrakeHold: {
-    ET.USER_DISABLE: EngagementAlert(AudibleAlert.none),
-    ET.NO_ENTRY: NoEntryAlert("Brake Hold Active"),
-  },
-
-  EventName.silentWrongGear: {
-    ET.WARNING: Alert(
-      "",
-      "",
-      AlertStatus.normal, AlertSize.none,
-      Priority.LOWEST, VisualAlert.none, AudibleAlert.none, 0.),
-    ET.NO_ENTRY: Alert(
-      "Gear not D",
-      "openpilot Unavailable",
-      AlertStatus.normal, AlertSize.mid,
-      Priority.LOW, VisualAlert.none, AudibleAlert.none, 0.),
-  },
-
-  EventName.silentReverseGear: {
-    ET.PERMANENT: Alert(
-      "Reverse\nGear",
-      "",
-      AlertStatus.normal, AlertSize.full,
-      Priority.LOWEST, VisualAlert.none, AudibleAlert.none, .2, creation_delay=0.5),
-    ET.NO_ENTRY: NoEntryAlert("Reverse Gear"),
-  },
-
-  EventName.silentDoorOpen: {
-    ET.WARNING: Alert(
-      "",
-      "",
-      AlertStatus.normal, AlertSize.none,
-      Priority.LOWEST, VisualAlert.none, AudibleAlert.none, 0.),
-    ET.NO_ENTRY: NoEntryAlert("Door Open"),
-  },
-
-  EventName.silentSeatbeltNotLatched: {
-    ET.WARNING: Alert(
-      "",
-      "",
-      AlertStatus.normal, AlertSize.none,
-      Priority.LOWEST, VisualAlert.none, AudibleAlert.none, 0.),
-    ET.NO_ENTRY: NoEntryAlert("Seatbelt Unlatched"),
-  },
-
-  EventName.silentParkBrake: {
-    ET.WARNING: Alert(
-      "",
-      "",
-      AlertStatus.normal, AlertSize.none,
-      Priority.LOWEST, VisualAlert.none, AudibleAlert.none, 0.),
-    ET.NO_ENTRY: NoEntryAlert("Parking Brake Engaged"),
-  },
-
-  EventName.controlsMismatchLateral: {
-    ET.IMMEDIATE_DISABLE: ImmediateDisableAlert("Controls Mismatch: Lateral"),
-    ET.NO_ENTRY: NoEntryAlert("Controls Mismatch: Lateral"),
-  },
-
-
 }


@@ -23,8 +23,6 @@ from openpilot.selfdrive.controls.lib.latcontrol import MIN_LATERAL_CONTROL_SPEE

 from openpilot.system.version import get_build_metadata

-from openpilot.sunnypilot.mads.mads import ModularAssistiveDrivingSystem
-
 REPLAY = "REPLAY" in os.environ
 SIMULATION = "SIMULATION" in os.environ
 TESTING_CLOSET = "TESTING_CLOSET" in os.environ
@@ -133,10 +131,6 @@ class SelfdriveD:
    elif self.CP.passive:
      self.events.add(EventName.dashcamMode, static=True)

-    self.mads = ModularAssistiveDrivingSystem(self)
-    sock_services = list(self.pm.sock.keys()) + ['selfdriveStateSP']
-    self.pm = messaging.PubMaster(sock_services)
-
  def update_events(self, CS):
    """Compute onroadEvents from carState"""

@@ -457,25 +451,11 @@ class SelfdriveD:
      self.pm.send('onroadEvents', ce_send)
    self.events_prev = self.events.names.copy()

-    # selfdriveStateSP
-    ss_sp_msg = messaging.new_message('selfdriveStateSP')
-    ss_sp_msg.valid = True
-    ss_sp = ss_sp_msg.selfdriveStateSP
-    mads = ss_sp.mads
-    mads.state = self.mads.state_machine.state
-    mads.enabled = self.mads.enabled
-    mads.active = self.mads.active
-    mads.available = self.mads.enabled_toggle
-
-    self.pm.send('selfdriveStateSP', ss_sp_msg)
-
  def step(self):
    CS = self.data_sample()
    self.update_events(CS)
    if not self.CP.passive and self.initialized:
      self.enabled, self.active = self.state_machine.update(self.events)
-    if not self.CP.notCar:
-      self.mads.update(CS, self.sm)
    self.update_alerts(CS)

    self.publish_selfdriveState(CS)
@@ -493,8 +473,6 @@ class SelfdriveD:
      self.is_metric = self.params.get_bool("IsMetric")
      self.experimental_mode = self.params.get_bool("ExperimentalMode") and self.CP.openpilotLongitudinalControl
      self.personality = self.read_personality_param()
-
-      self.mads.read_params()
      time.sleep(0.1)

  def run(self):
@@ -41,7 +41,7 @@ class TestAlerts:
    events = log.OnroadEvent.EventName.schema.enumerants

    for name, e in events.items():
-      if not name.endswith("DEPRECATED") and not name.startswith("eventReserved"):
+      if not name.endswith("DEPRECATED"):
        fail_msg = f"{name} @{e} not in EVENTS"
        assert e in EVENTS.keys(), fail_msg

@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:737f95d34912db53a303ba6499e6f697b510fa5872b8c71f701a4fe924b5466e
+size 356169
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:79ccc3bd2094ba8a55adedf0007b0152eb3a68edc5e2d35aeccbba122d5811c6
-size 356151
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e359e8f6b5a22b6f3f89b54989dac2110ee3a4463de2d785be83e20cda4f1cb6
+size 254248
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:45f39fe1d1dc8c271f577d1a12812e01da34979bf3fffaad01a19a7a61b6d456
-size 256342
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8e4d044812a714ebdf0b15e73d4466e9ddaafa374368f308803c6b68dcd79ab
+size 332433
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ec8f8d879b38a4c8d4319a3bd67872d5e5d348bc5472443c4c8aa1cb1dd62cf5
-size 332404
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53b80c3c99a6897cafe7d408872c287ab0bbaac2751e344cf4623b312d2e4866
+size 268928
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:68afd54dc68f6abff699d2740f90830b0f492db8818869e8936a63e7bed80458
-size 268827
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4fa8a841c964e90b9c14b5aede8f30de949d319ae072cc291f0afb1c4c24baa
+size 437808
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:32a458c0b364c1c793a4a843fe7a8fd21dd4ad45ff87dc8ad41a8e8759e52c80
-size 437811
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce370994de01d6240fa753846ddc7e1b852acac3f649a4c29ea16acae126f974
+size 308578
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7b3d412d4066f9ac90788e11a20d99ea3ff92eafdc8a317610b7d9c918aedd59
-size 308644
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82333cfc026735eac81defae9e01b82b81ac30fd01ad265dbdd311dd97322198
+size 393106
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8ad0c3881d7d88f9038a1b3a5e43c779e84dca47e2f7f6d45a3449cd8a7dec99
-size 393122
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4ebae1742e3bb7d89f8cd452928c5c6a1b8679155562d23c6303ca4ec2e3b02
+size 334350
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6a0f9dc3743e4e09375f7a3289228b64486915ae4ef6dfcff66572a34820d5d2
-size 334502
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03326fb4486c1ffdd3609a0e0200e65d1f30dca5d9b047501d98b1b2d5321e75
+size 470495
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:944406ae64efb422b568437588fec6a8a8b5b7d9e577dc1d22c83a1d6d3813ff
-size 470417
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bffa062a5eace4d499c2fe12f2b0eb8c71207f4e7bea0e88456614f00e2859b1
+size 258989
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d80c61dc2a0b3685a522013c8aa8347adfdfc6d0611485da88b3a17771c68301
-size 260913
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe97d060fb6e60652b26b26406ca8b03890f6bff4b4bc5cb73fca267068a04c9
+size 217460
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3271e31149f7053d134d3a0217851167e573647a8b415138e8c80d429d0a02c9
-size 217488
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60e33d209f4f600acf344feca8ab6494a1bf3aafe3c7121e2110a82ea06aa61e
+size 293084
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:35c95eaf222affae2e236f0b717c17e7f3b8a30e85d4ebb4ad1506e5025ce0b8
-size 293078
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
DevTekVE	40d9e092b6	Refactor type annotations and return types in model_runner Removed specific type hints and return annotations for `self.inputs` and `run_model` methods to enhance flexibility and maintain consistency. These changes streamline the code and improve compatibility with varying input/output types during model inference.	2024-12-29 18:04:05 +01:00
DevTekVE	6318aa52e3	Refactor type hints for improved code consistency. Replaced specific type hints like `dict[str, np.ndarray]` with generic `dict` in several method signatures to simplify annotations. This improves overall readability and maintains functionality, while aligning with existing code practices.	2024-12-29 17:59:50 +01:00
DevTekVE	7852fa66b1	Clean	2024-12-29 17:51:30 +01:00
DevTekVE	8d9c1e2035	Add type annotations to inputs and prepare_inputs method This commit adds explicit type annotations for the `inputs` dictionary and the return value of the `prepare_inputs` method in `model_runner.py`. These changes improve code readability and ensure type consistency, enhancing maintainability and reducing potential errors.	2024-12-29 17:50:58 +01:00
DevTekVE	c39f722f7b	Rename TinyGradRunner to TinygradRunner for consistency. The class name and references were updated to maintain naming consistency across the codebase. This aligns with naming conventions and improves code clarity. No functional changes were introduced.	2024-12-29 17:48:00 +01:00
DevTekVE	550c08ac4c	Refactor `prepare_inputs` to use explicit `CLMem` type. This update replaces the generic `any` type with the more explicit `CLMem` type for better type safety and clarity. It ensures consistency across the `prepare_inputs` method implementations in derived classes, improving code readability and robustness.	2024-12-29 17:44:23 +01:00
DevTekVE	8c838af5fa	Refactor model inference to use internal state for inputs Simplified the `run_model` method by removing the requirement to pass inputs as arguments, and instead leveraging an internal `inputs` state. Adjusted `prepare_inputs` methods across model runners to populate this internal state. This refactor improves code clarity and reduces redundancy in managing input data.	2024-12-29 17:40:45 +01:00
DevTekVE	2521d60b1b	Fix model_runner output to ensure tensor conversion to NumPy. Updated the `run_model` method to explicitly convert tensor outputs to NumPy arrays using `.numpy()`. This ensures compatibility with downstream processes relying on NumPy array inputs.	2024-12-29 17:31:27 +01:00
DevTekVE	206398d3b5	Remove unused imports and redundant pass statements. This change cleans up the code by removing unused imports and redundant `pass` statements in abstract methods. It improves code readability and adheres to cleaner coding practices.	2024-12-29 17:22:13 +01:00
DevTekVE	a9e99615cd	Remove redundant tensor assignment in model runner. The `assign` operation was unnecessary as new tensors are realized for updated inputs. This simplifies the code and avoids redundant updates, improving clarity and maintainability.	2024-12-29 17:20:15 +01:00
DevTekVE	ec44b78ad8	Refactor model runner initialization logic. Removed the `create_model_runner` factory function and replaced it with direct initialization of `TinyGradRunner` or `ONNXRunner`. Simplified the `__init__` methods by standardizing paths as constants within `model_runner.py` for cleaner and more maintainable code.	2024-12-29 17:14:08 +01:00
DevTekVE	dcd3e09294	Refactor formatting in model_runner.py for readability. Consolidated multiline function declarations and calls into single lines where appropriate to improve code readability and maintainability. No changes were made to the functionality.	2024-12-29 17:01:26 +01:00
DevTekVE	839a7a58e0	Refactoring model handling in modeld.py with ModelRunner abstraction A significant refactoring of `modeld.py` was performed to enhance the handling of model logic. A new abstraction called `ModelRunner` has been introduced which encapsulates the model-running logic. This refactor simplifies the `modeld.py` script and provides easier management across different hardware configurations. Using this segregation, varying processing methods for models can be handled distinctly ensuring cleaner and more maintainable code. An instance of the appropriate model runner is now created during initialization based on whether a TICI hardware or a different type is used.	2024-12-29 16:59:10 +01:00