using lfs instead

initial data
update submodule
2026-06-20 09:12:05 +08:00 · 2024-12-19 22:09:29 +01:00 · 2024-12-19 22:05:22 +01:00 · 2024-12-19 22:02:27 +01:00 · 2024-12-19 22:00:06 +01:00 · 2024-12-19 21:57:32 +01:00
119 changed files with 1153 additions and 1866 deletions
@@ -9,6 +9,7 @@
 *.ttf filter=lfs diff=lfs merge=lfs -text
 *.wav filter=lfs diff=lfs merge=lfs -text

+selfdrive/test/process_replay/fakedata/*.zst filter=lfs diff=lfs merge=lfs -text
 selfdrive/car/tests/test_models_segs.txt filter=lfs diff=lfs merge=lfs -text
 system/hardware/tici/updater filter=lfs diff=lfs merge=lfs -text
 selfdrive/ui/qt/spinner_larch64 filter=lfs diff=lfs merge=lfs -text
@@ -24,7 +24,7 @@ jobs:
    # Check PR target branch
    - name: check branch
      uses: Vankka/pr-target-branch-action@def32ec9d93514138d6ac0132ee62e120a72aed5
-      if: github.repository == 'commaai/openpilot'
+      if: github.repository == 'sunnypilot/sunnypilot'
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      with:
@@ -37,17 +37,17 @@ jobs:
    # Welcome comment
    - name: "First timers PR"
      uses: actions/first-interaction@v1
-      if: github.event.pull_request.head.repo.full_name != 'commaai/openpilot'
+      if: github.event.pull_request.head.repo.full_name != 'sunnypilot/sunnypilot'
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        pr-message: |
            <!-- _(run_id **${{ github.run_id }}**)_ -->
            Thanks for contributing to openpilot! In order for us to review your PR as quickly as possible, check the following:
            * Convert your PR to a draft unless it's ready to review
-            * Read the [contributing docs](https://github.com/commaai/openpilot/blob/master/docs/CONTRIBUTING.md)
+            * Read the [contributing docs](https://github.com/sunnypilot/sunnypilot/blob/master/docs/CONTRIBUTING.md)
            * Before marking as "ready for review", ensure:
              * the goal is clearly stated in the description
              * all the tests are passing
-              * the change is [something we merge](https://github.com/commaai/openpilot/blob/master/docs/CONTRIBUTING.md#what-gets-merged)
+              * the change is [something we merge](https://github.com/sunnypilot/sunnypilot/blob/master/docs/CONTRIBUTING.md#what-gets-merged)
              * include a route or your device' dongle ID if relevant
-      
+
@@ -13,7 +13,7 @@ jobs:
  badges:
    name: create badges
    runs-on: ubuntu-latest
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    permissions:
      contents: write
    steps:
@@ -29,7 +29,7 @@ jobs:

        git checkout --orphan badges
        git rm -rf --cached .
-        git config user.email "badge-researcher@comma.ai"
+        git config user.email "badge-researcher@sunnypilot.ai"
        git config user.name "Badge Researcher"

        git add translation_badge.svg
@@ -15,7 +15,7 @@ env:

 jobs:
  setup:
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    runs-on: ubuntu-latest
    outputs:
      ci_runs: ${{ steps.ci_runs_setup.outputs.matrix }}
@@ -31,7 +31,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix: ${{fromJSON(needs.setup.outputs.ci_runs)}}
-    uses: commaai/openpilot/.github/workflows/ci_weekly_run.yaml@master
+    uses: sunnypilot/sunnypilot/.github/workflows/ci_weekly_run.yaml@master
    with:
      run_number: ${{ matrix.run_number }}

@@ -12,6 +12,6 @@ concurrency:

 jobs:
  selfdrive_tests:
-    uses: commaai/openpilot/.github/workflows/selfdrive_tests.yaml@master
+    uses: sunnypilot/sunnypilot/.github/workflows/selfdrive_tests.yaml@master
    with:
      run_number: ${{ inputs.run_number }}
@@ -15,7 +15,7 @@ runs:
                          scons -j$(nproc) --cache-populate"
    - name: Save scons cache
      uses: actions/cache/save@v4
-      if: github.ref == 'refs/heads/master'
+      if: (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new')
      with:
        path: .ci_cache/scons_cache
        key: scons-${{ runner.arch }}-${{ env.CACHE_COMMIT_DATE }}-${{ github.sha }}
@@ -18,7 +18,7 @@ concurrency:
 jobs:
  docs:
    name: build docs
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
    steps:
    - uses: commaai/timeout@v1

@@ -35,13 +35,13 @@ jobs:

    # Push to docs.comma.ai
    - uses: actions/checkout@v4
-      if: github.ref == 'refs/heads/master' && github.repository == 'commaai/openpilot'
+      if: github.ref == 'refs/heads/master' && github.repository == 'sunnypilot/sunnypilot'
      with:
        path: openpilot-docs
        ssh-key: ${{ secrets.OPENPILOT_DOCS_KEY }}
-        repository: commaai/openpilot-docs
+        repository: sunnypilot/sunnypilot-docs
    - name: Push
-      if: github.ref == 'refs/heads/master' && github.repository == 'commaai/openpilot'
+      if: github.ref == 'refs/heads/master' && github.repository == 'sunnypilot/sunnypilot'
      run: |
        set -x

@@ -0,0 +1,72 @@
+name: Sync comma's LFS
+
+env:
+  LFS_URL: 'https://gitlab.com/sunnypilot/public/sunnypilot-new-lfs.git/info/lfs'
+  LFS_PUSH_URL: 'ssh://git@gitlab.com/sunnypilot/public/sunnypilot-new-lfs.git'
+
+on:
+  schedule:
+    - cron: '0 0 * * *'  # Runs at 00:00 UTC every day
+  push:
+    branches:
+      - 'master-new'
+  pull_request:
+    branches:
+      - 'master-new'
+  workflow_dispatch:  # enables manual triggering
+    inputs:
+      upstream_branch:
+        default: 'master'
+        type: string
+
+jobs:
+  sync:
+    runs-on: ubuntu-latest
+    # Skip if PR is in draft mode
+    if: github.event_name != 'pull_request' || (github.event_name == 'pull_request' && github.event.pull_request.draft == false)
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          repository: 'commaai/openpilot'
+          ref: ${{ inputs.upstream_branch }}
+      
+      - name: LFS Fetch
+        run: |
+          git lfs fetch
+
+      - name: Set up Git
+        run: |
+          git config --global user.name 'GitHub Action'
+          git config --global user.email 'action@github.com'
+
+      - name: Set up SSH
+        uses: webfactory/ssh-agent@v0.9.0
+        with:
+          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
+
+      - name: Add GitLab public keys
+        run: |
+          ssh-keyscan -H gitlab.com >> ~/.ssh/known_hosts
+      
+      - name: Ensure branch
+        run: |
+          if git symbolic-ref -q HEAD >/dev/null; then
+              echo "Already on a branch, proceeding with push"
+            else
+              echo "Detached HEAD state detected, creating temporary branch"
+            git checkout -b temp_branch
+          fi
+
+      - name: Update LFS Config
+        run: |
+          echo '[lfs]' > .lfsconfig
+          echo '    url = ${{ env.LFS_URL }}' >> .lfsconfig
+          echo '    pushurl = ${{ env.LFS_PUSH_URL }}' >> .lfsconfig
+          echo '    locksverify = false' >> .lfsconfig
+
+      - name: Push LFS
+        id: sync-and-commit
+        run: |
+          git lfs ls-files -l
+          git lfs push --all origin
@@ -12,7 +12,7 @@ jobs:
  build_prebuilt:
    name: build prebuilt
    runs-on: ubuntu-latest
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    env:
      PUSH_IMAGE: true
    permissions:
@@ -13,7 +13,7 @@ jobs:
    container:
      image: ghcr.io/commaai/openpilot-base:latest
    runs-on: ubuntu-latest
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    permissions:
      checks: read
      contents: write
@@ -11,7 +11,7 @@ jobs:
    runs-on: ubuntu-latest
    container:
      image: ghcr.io/commaai/openpilot-base:latest
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    steps:
    - uses: actions/checkout@v4
      with:
@@ -4,6 +4,7 @@ on:
  push:
    branches:
      - master
+      - master-new
  pull_request:
  workflow_dispatch:
  workflow_call:
@@ -14,10 +15,11 @@ on:
        type: string

 concurrency:
-  group: selfdrive-tests-ci-run-${{ inputs.run_number }}-${{ github.event_name == 'push' && github.ref == 'refs/heads/master' && github.run_id || github.head_ref || github.ref }}-${{ github.workflow }}-${{ github.event_name }}
+  group: selfdrive-tests-ci-run-${{ inputs.run_number }}-${{ github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new') && github.run_id || github.head_ref || github.ref }}-${{ github.workflow }}-${{ github.event_name }}
  cancel-in-progress: true

 env:
+  REPORT_NAME: report-${{ inputs.run_number || '1' }}-${{ github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new') && 'master' || github.event.number }}
  PYTHONWARNINGS: error
  BASE_IMAGE: openpilot-base
  AZURE_TOKEN: ${{ secrets.AZURE_COMMADATACI_OPENPILOTCI_TOKEN }}
@@ -31,6 +33,7 @@ env:

 jobs:
  build_release:
+    if: github.repository == 'commaai/openpilot' # build_release blocked for the time being to only comma as we may have a different process.
    name: build release
    runs-on:
      - ${{ ((github.repository == 'commaai/openpilot') && ((github.event_name != 'pull_request') || (github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))) && 'namespace-profile-amd64-8x16' || 'ubuntu-24.04' }}
@@ -52,7 +55,7 @@ jobs:
      run: TARGET_DIR=$STRIPPED_DIR release/build_devel.sh
    - uses: ./.github/workflows/setup-with-retry
    - name: Check submodules
-      if: github.repository == 'commaai/openpilot'
+      if: github.repository == 'sunnypilot/sunnypilot'
      timeout-minutes: 3
      run: release/check-submodules.sh
    - name: Build openpilot and run checks
@@ -92,8 +95,11 @@ jobs:
      timeout-minutes: 30

  build_mac:
+    if: github.repository == 'commaai/openpilot' # Blocking macos builds as well since they have a 10x miltiplier for GH action minutes, waaaay too much!
    name: build macOS
-    runs-on: ${{ github.repository == 'commaai/openpilot' && 'namespace-profile-macos-8x14' || 'macos-latest' }}
+    runs-on: ${{ ((github.repository == 'commaai/openpilot') &&
+                   ((github.event_name != 'pull_request') ||
+                    (github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))) && 'namespace-profile-macos-8x14' || 'macos-latest' }}
    steps:
    - uses: actions/checkout@v4
      with:
@@ -102,6 +108,7 @@ jobs:
      uses: ./.github/workflows/auto-cache
      with:
        path: ~/Library/Caches/Homebrew
+        key: build_macos_${{ hashFiles('.github/workflows/selfdrive_tests.yaml') }}
    - name: Install dependencies
      run: ./tools/mac_setup.sh
      env:
@@ -113,6 +120,7 @@ jobs:
      uses: ./.github/workflows/auto-cache
      with:
        path: /tmp/scons_cache
+        key: build_macos_${{ hashFiles('.github/workflows/selfdrive_tests.yaml') }}
    - name: Building openpilot
      run: . .venv/bin/activate && scons -j$(nproc)

@@ -231,7 +239,7 @@ jobs:
      uses: actions/cache@v4
      with:
        path: .ci_cache/comma_download_cache
-        key: car_models-${{ hashFiles('selfdrive/car/tests/test_models.py', 'selfdrive/car/tests/routes.py') }}-${{ matrix.job }}
+        key: car_models-${{ hashFiles('selfdrive/car/tests/test_models.py', 'opendbc/car/tests/routes.py') }}-${{ matrix.job }}
    - name: Build openpilot
      run: ${{ env.RUN }} "scons -j$(nproc)"
    - name: Test car models
@@ -309,6 +317,7 @@ jobs:
    runs-on:
      - ${{ ((github.repository == 'commaai/openpilot') && ((github.event_name != 'pull_request') || (github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))) && 'namespace-profile-amd64-8x16' || 'ubuntu-24.04' }}
      - ${{ ((github.repository == 'commaai/openpilot') && ((github.event_name != 'pull_request') || (github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))) && 'namespace-experiments:docker.builds.local-cache=separate' || 'ubuntu-24.04' }}
+    if: (github.repository == 'commaai/openpilot') && ((github.event_name != 'pull_request') || (github.event.pull_request.head.repo.full_name == 'commaai/openpilot'))
    steps:
    - uses: actions/checkout@v4
      with:
@@ -353,5 +362,5 @@ jobs:
      - name: Upload Test Report
        uses: actions/upload-artifact@v4
        with:
-          name: report-${{ inputs.run_number || '1' }}-${{ github.event_name == 'push' && github.ref == 'refs/heads/master' && 'master' || github.event.number }}
+          name: ${{ env.REPORT_NAME }}
          path: selfdrive/ui/tests/test_ui/report_1/screenshots
@@ -20,7 +20,7 @@ jobs:
          stale-pr-message: 'This PR has had no activity for ${{ env.DAYS_BEFORE_PR_STALE }} days. It will be automatically closed in ${{ env.DAYS_BEFORE_PR_CLOSE }} days if there is no activity.'
          close-pr-message: 'This PR has been automatically closed due to inactivity. Feel free to re-open once activity resumes.'
          stale-pr-label: stale
-          delete-branch: ${{ github.event.pull_request.head.repo.full_name == 'commaai/openpilot' }} # only delete branches on the main repo
+          delete-branch: ${{ github.event.pull_request.head.repo.full_name == 'sunnypilot/sunnypilot' }} # only delete branches on the main repo
          exempt-pr-labels: "ignore stale,needs testing" # if wip or it needs testing from the community, don't mark as stale
          days-before-pr-stale: ${{ env.DAYS_BEFORE_PR_STALE }}
          days-before-pr-close: ${{ env.DAYS_BEFORE_PR_CLOSE }}
@@ -3,23 +3,25 @@ on:
  push:
    branches:
      - master
+      - master-new
  pull_request_target:
    types: [assigned, opened, synchronize, reopened, edited]
    branches:
      - 'master'
+      - 'master-new'
    paths:
      - 'selfdrive/ui/**'
  workflow_dispatch:

 env:
  UI_JOB_NAME: "Create UI Report"
-  REPORT_NAME: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' && 'master' || github.event.number }}
-  SHA: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' && github.sha || github.event.pull_request.head.sha }}
+  REPORT_NAME: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new') && 'master' || github.event.number }}
+  SHA: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new') && github.sha || github.event.pull_request.head.sha }}
  BRANCH_NAME: "openpilot/pr-${{ github.event.number }}"

 jobs:
  preview:
-    if: github.repository == 'commaai/openpilot'
+    if: github.repository == 'sunnypilot/sunnypilot'
    name: preview
    runs-on: ubuntu-latest
    timeout-minutes: 20
@@ -58,13 +60,13 @@ jobs:
      - name: Getting master ui
        uses: actions/checkout@v4
        with:
-          repository: commaai/ci-artifacts
+          repository: sunnypilot/ci-artifacts
          ssh-key: ${{ secrets.CI_ARTIFACTS_DEPLOY_KEY }}
          path: ${{ github.workspace }}/master_ui
          ref: openpilot_master_ui

      - name: Saving new master ui
-        if: github.ref == 'refs/heads/master' && github.event_name == 'push'
+        if: (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/master-new') && github.event_name == 'push'
        working-directory: ${{ github.workspace }}/master_ui
        run: |
          git checkout --orphan=new_master_ui
@@ -106,13 +108,13 @@ jobs:
              DIFF="${DIFF}<table>"

              DIFF="${DIFF}<tr>"
-              DIFF="${DIFF}  <td> master <img src=\"https://raw.githubusercontent.com/commaai/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_master_ref.png\"> </td>"
-              DIFF="${DIFF}  <td> proposed <img src=\"https://raw.githubusercontent.com/commaai/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}.png\"> </td>"
+              DIFF="${DIFF}  <td> master <img src=\"https://raw.githubusercontent.com/sunnypilot/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_master_ref.png\"> </td>"
+              DIFF="${DIFF}  <td> proposed <img src=\"https://raw.githubusercontent.com/sunnypilot/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}.png\"> </td>"
              DIFF="${DIFF}</tr>"

              DIFF="${DIFF}<tr>"
-              DIFF="${DIFF}  <td> diff <img src=\"https://raw.githubusercontent.com/commaai/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_diff.png\"> </td>"
-              DIFF="${DIFF}  <td> composite diff <img src=\"https://raw.githubusercontent.com/commaai/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_diff.gif\"> </td>"
+              DIFF="${DIFF}  <td> diff <img src=\"https://raw.githubusercontent.com/sunnypilot/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_diff.png\"> </td>"
+              DIFF="${DIFF}  <td> composite diff <img src=\"https://raw.githubusercontent.com/sunnypilot/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}_diff.gif\"> </td>"
              DIFF="${DIFF}</tr>"

              DIFF="${DIFF}</table>"
@@ -125,7 +127,7 @@ jobs:
            if [[ $INDEX -eq 0 ]]; then
              TABLE="${TABLE}<tr>"
            fi
-            TABLE="${TABLE}  <td> <img src=\"https://raw.githubusercontent.com/commaai/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}.png\"> </td>"
+            TABLE="${TABLE}  <td> <img src=\"https://raw.githubusercontent.com/sunnypilot/ci-artifacts/${{ env.BRANCH_NAME }}/${A[$i]}.png\"> </td>"
            if [[ $INDEX -eq 1 || $(($i + 1)) -eq ${#A[*]} ]]; then
              TABLE="${TABLE}</tr>"
            fi
@@ -1,18 +1,21 @@
 [submodule "panda"]
  path = panda
-  url = ../../commaai/panda.git
+  url = https://github.com/sunnyhaibin/panda.git
 [submodule "opendbc"]
  path = opendbc_repo
-  url = ../../commaai/opendbc.git
+  url = https://github.com/sunnypilot/opendbc.git
 [submodule "msgq"]
  path = msgq_repo
-  url = ../../commaai/msgq.git
+  url = https://github.com/sunnypilot/msgq.git
 [submodule "rednose_repo"]
  path = rednose_repo
-  url = ../../commaai/rednose.git
+  url = https://github.com/commaai/rednose.git
 [submodule "teleoprtc_repo"]
  path = teleoprtc_repo
-  url = ../../commaai/teleoprtc
+  url = https://github.com/commaai/teleoprtc
 [submodule "tinygrad"]
  path = tinygrad_repo
-  url = https://github.com/commaai/tinygrad.git
+  url = https://github.com/tinygrad/tinygrad.git
+[submodule "fakedata"]
+  path = selfdrive/test/process_replay/fakedata
+  url = https://github.com/sunnypilot/fakedata.git
@@ -1,4 +1,4 @@
 [lfs]
-	url = https://gitlab.com/commaai/openpilot-lfs.git/info/lfs
-	pushurl = ssh://git@gitlab.com/commaai/openpilot-lfs.git
+	url = https://gitlab.com/sunnypilot/public/sunnypilot-new-lfs.git/info/lfs
+	pushurl = ssh://git@gitlab.com/sunnypilot/public/sunnypilot-new-lfs.git
 	locksverify = false
@@ -0,0 +1,4 @@
+[lfs]
+	url = https://gitlab.com/commaai/openpilot-lfs.git/info/lfs
+	pushurl = ssh://git@gitlab.com/commaai/openpilot-lfs.git
+	locksverify = false
@@ -1,9 +1,9 @@
 FROM ghcr.io/commaai/openpilot-base:latest

-ENV PYTHONUNBUFFERED 1
+ENV PYTHONUNBUFFERED=1

-ENV OPENPILOT_PATH /home/batman/openpilot
-ENV PYTHONPATH ${OPENPILOT_PATH}:${PYTHONPATH}
+ENV OPENPILOT_PATH=/home/batman/openpilot
+ENV PYTHONPATH=${OPENPILOT_PATH}:${PYTHONPATH}

 RUN mkdir -p ${OPENPILOT_PATH}
 WORKDIR ${OPENPILOT_PATH}
@@ -1,16 +1,16 @@
 FROM ubuntu:24.04

-ENV PYTHONUNBUFFERED 1
+ENV PYTHONUNBUFFERED=1

 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends sudo tzdata locales ssh pulseaudio xvfb x11-xserver-utils gnome-screenshot && \
+    apt-get install -y --no-install-recommends sudo tzdata locales ssh pulseaudio xvfb x11-xserver-utils gnome-screenshot python3-tk python3-dev && \
    rm -rf /var/lib/apt/lists/*

 RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && locale-gen
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US:en
-ENV LC_ALL en_US.UTF-8
+ENV LANG=en_US.UTF-8
+ENV LANGUAGE=en_US:en
+ENV LC_ALL=en_US.UTF-8

 COPY tools/install_ubuntu_dependencies.sh /tmp/tools/
 RUN /tmp/tools/install_ubuntu_dependencies.sh && \
@@ -55,9 +55,9 @@ RUN mkdir -p /tmp/opencl-driver-intel && \
    cd / && \
    rm -rf /tmp/opencl-driver-intel

-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES graphics,utility,compute
-ENV QTWEBENGINE_DISABLE_SANDBOX 1
+ENV NVIDIA_VISIBLE_DEVICES=all
+ENV NVIDIA_DRIVER_CAPABILITIES=graphics,utility,compute
+ENV QTWEBENGINE_DISABLE_SANDBOX=1

 RUN dbus-uuidgen > /etc/machine-id

@@ -0,0 +1,21 @@
+# Custom MIT License
+
+Copyright (c) 2024, Haibin Wen, SUNNYPILOT LLC
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to view and modify the Software, subject to the following conditions:
+
+1. **Permission Required**: Permission Required for Commercial, For-Profit, or Closed Source Use: Use of the Software, in whole or in part, for any commercial purposes, for-profit projects, or in closed source projects requires explicit written permission from the original author(s).
+
+2. **Redistribution**: Any redistribution of the Software, modified or unmodified, must retain this license notice and the following acknowledgment:
+   "This software is licensed under a custom license requiring permission for use."
+
+3. **Visibility**: Any project that uses the Software must visibly mention the following acknowledgment:
+   "This project uses software from Haibin Wen and SUNNYPILOT LLC and is licensed under a custom license requiring permission for use."
+
+4. **No Warranty**: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Contact sunnypilot Support <support@sunnypilot.ai> for permission requests.
+
+---
+
+Haibin Wen, SUNNYPILOT LLC
@@ -38,7 +38,8 @@ Quick start: `bash <(curl -fsSL openpilot.comma.ai)`
  </tr>
 </table>

-To start using openpilot in a car
+
+Using openpilot in a car
 ------

 To use openpilot in a car, you need four things:
@@ -49,6 +50,14 @@ To use openpilot in a car, you need four things:

 We have detailed instructions for [how to install the harness and device in a car](https://comma.ai/setup). Note that it's possible to run openpilot on [other hardware](https://blog.comma.ai/self-driving-car-for-free/), although it's not plug-and-play.

+### Branches
+| branch           | URL                                    | description                                                                         |
+|------------------|----------------------------------------|-------------------------------------------------------------------------------------|
+| `release3`         | openpilot.comma.ai                      | This is openpilot's release branch.                                                 |
+| `release3-staging` | openpilot-test.comma.ai                | This is the staging branch for releases. Use it to get new releases slightly early. |
+| `nightly`          | openpilot-nightly.comma.ai             | This is the bleeding edge development branch. Do not expect this to be stable.      |
+| `nightly-dev`      | installer.comma.ai/commaai/nightly-dev | Same as nightly, but includes experimental development features for some cars.      |
+
 To start developing openpilot
 ------

@@ -2440,6 +2440,14 @@ struct Microphone {
  filteredSoundPressureWeightedDb @2 :Float32;
 }

+struct Touch {
+  sec @0 :Int64;
+  usec @1 :Int64;
+  type @2 :UInt8;
+  code @3 :Int32;
+  value @4 :Int32;
+}
+
 struct Event {
  logMonoTime @0 :UInt64;  # nanoseconds
  valid @67 :Bool = true;
@@ -2520,6 +2528,9 @@ struct Event {
    logMessage @18 :Text;
    errorLogMessage @85 :Text;

+    # touch frame
+    touch @135 :List(Touch);
+
    # navigation
    navInstruction @82 :NavInstruction;
    navRoute @83 :NavRoute;
@@ -22,6 +22,7 @@ _services: dict[str, tuple] = {
  "temperatureSensor2": (True, 2., 200),
  "gpsNMEA": (True, 9.),
  "deviceState": (True, 2., 1),
+  "touch": (True, 20., 1),
  "can": (True, 100., 2053),  # decimation gives ~3 msgs in a full segment
  "controlsState": (True, 100., 10),
  "selfdriveState": (True, 100., 10),
@@ -1,7 +1,7 @@
 import numpy as np

 from openpilot.common.transformations.orientation import rot_from_euler
-from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame
+from openpilot.common.transformations.camera import get_view_frame_from_calib_frame, view_frame_from_device_frame, _ar_ox_fisheye

 # segnet
 SEGNET_SIZE = (512, 384)
@@ -39,6 +39,13 @@ sbigmodel_intrinsics = np.array([
  [0.0,  sbigmodel_fl,      0.5 * (256 + MEDMODEL_CY)],
  [0.0,  0.0,                                     1.0]])

+DM_INPUT_SIZE = (1440, 960)
+dmonitoringmodel_fl = _ar_ox_fisheye.focal_length
+dmonitoringmodel_intrinsics = np.array([
+  [dmonitoringmodel_fl,  0.0, DM_INPUT_SIZE[0]/2],
+  [0.0, dmonitoringmodel_fl, DM_INPUT_SIZE[1]/2 - (_ar_ox_fisheye.height - DM_INPUT_SIZE[1])/2],
+  [0.0,  0.0, 1.0]])
+
 bigmodel_frame_from_calib_frame = np.dot(bigmodel_intrinsics,
  get_view_frame_from_calib_frame(0, 0, 0, 0))

@@ -103,7 +103,7 @@ A supported vehicle is one that just works when you install a comma device. All
 |Hyundai|Ioniq Plug-in Hybrid 2020-22|All|openpilot available[<sup>1</sup>](#footnotes)|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai H connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Ioniq Plug-in Hybrid 2020-22">Buy Here</a></sub></details>||
 |Hyundai|Kona 2020|Smart Cruise Control (SCC)|openpilot available[<sup>1</sup>](#footnotes)|6 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-empty.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai B connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona 2020">Buy Here</a></sub></details>||
 |Hyundai|Kona Electric 2018-21|Smart Cruise Control (SCC)|openpilot available[<sup>1</sup>](#footnotes)|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai G connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona Electric 2018-21">Buy Here</a></sub></details>||
-|Hyundai|Kona Electric 2022-23|Smart Cruise Control (SCC)|Stock|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai O connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona Electric 2022-23">Buy Here</a></sub></details>||
+|Hyundai|Kona Electric 2022-23|Smart Cruise Control (SCC)|openpilot available[<sup>1</sup>](#footnotes)|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai O connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona Electric 2022-23">Buy Here</a></sub></details>||
 |Hyundai|Kona Electric (with HDA II, Korea only) 2023[<sup>5</sup>](#footnotes)|Smart Cruise Control (SCC)|Stock|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai R connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona Electric (with HDA II, Korea only) 2023">Buy Here</a></sub></details>|<a href="https://www.youtube.com/watch?v=U2fOCmcQ8hw" target="_blank"><img height="18px" src="assets/icon-youtube.svg"></img></a>|
 |Hyundai|Kona Hybrid 2020|Smart Cruise Control (SCC)|openpilot available[<sup>1</sup>](#footnotes)|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai I connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Kona Hybrid 2020">Buy Here</a></sub></details>||
 |Hyundai|Palisade 2020-22|All|openpilot available[<sup>1</sup>](#footnotes)|0 mph|0 mph|[![star](assets/icon-star-full.svg)](##)|[![star](assets/icon-star-full.svg)](##)|<details><summary>Parts</summary><sub>- 1 Hyundai H connector<br>- 1 RJ45 cable (7 ft)<br>- 1 comma 3X<br>- 1 comma power v2<br>- 1 harness box<br>- 1 mount<br>- 1 right angle OBD-C cable (1.5 ft)<br><a href="https://comma.ai/shop/comma-3x.html?make=Hyundai&model=Palisade 2020-22">Buy Here</a></sub></details>|<a href="https://youtu.be/TAnDqjF4fDY?t=456" target="_blank"><img height="18px" src="assets/icon-youtube.svg"></img></a>|
@@ -0,0 +1,44 @@
+[data-tooltip] {
+  position: relative;
+  display: inline-block;
+  border-bottom: 1px dotted black;
+}
+
+[data-tooltip] .tooltip-content {
+  width: max-content;
+  max-width: 25em;
+  position: absolute;
+  top: 100%;
+  left: 50%;
+  transform: translateX(-50%);
+  background-color: white;
+  color: #404040;
+  box-shadow: 0 4px 14px 0 rgba(0,0,0,.2), 0 0 0 1px rgba(0,0,0,.05);
+  padding: 10px;
+  font: 14px/1.5 Lato, proxima-nova, Helvetica Neue, Arial, sans-serif;
+  text-decoration: none;
+  opacity: 0;
+  visibility: hidden;
+  transition: opacity 0.1s, visibility 0s;
+  z-index: 1000;
+  pointer-events: none; /* Prevent accidental interaction */
+}
+
+[data-tooltip]:hover .tooltip-content {
+  opacity: 1;
+  visibility: visible;
+  pointer-events: auto; /* Allow interaction when visible */
+}
+
+.tooltip-content .tooltip-glossary-link {
+  display: inline-block;
+  margin-top: 8px;
+  font-size: 12px;
+  color: #007bff;
+  text-decoration: none;
+}
+
+.tooltip-content .tooltip-glossary-link:hover {
+  color: #0056b3;
+  text-decoration: underline;
+}
@@ -0,0 +1,68 @@
+import re
+import tomllib
+
+def load_glossary(file_path="docs/glossary.toml"):
+  with open(file_path, "rb") as f:
+    glossary_data = tomllib.load(f)
+  return glossary_data.get("glossary", {})
+
+def generate_anchor_id(name):
+  return name.replace(" ", "-").replace("_", "-").lower()
+
+def format_markdown_term(name, definition):
+  anchor_id = generate_anchor_id(name)
+  markdown = f"* [**{name.replace('_', ' ').title()}**](#{anchor_id})"
+  if definition.get("abbreviation"):
+    markdown += f" *({definition['abbreviation']})*"
+  if definition.get("description"):
+    markdown += f": {definition['description']}\n"
+  return markdown
+
+def glossary_markdown(vocabulary):
+  markdown = ""
+  for category, terms in vocabulary.items():
+    markdown += f"## {category.replace('_', ' ').title()}\n\n"
+    for name, definition in terms.items():
+      markdown += format_markdown_term(name, definition)
+  return markdown
+
+def format_tooltip_html(term_key, definition, html):
+  display_term = term_key.replace("_", " ").title()
+  clean_description = re.sub(r"\[(.+)]\(.+\)", r"\1", definition["description"])
+  glossary_link = (
+    f"<a href='/concepts/glossary#{term_key}' class='tooltip-glossary-link' title='View in glossary'>Glossary🔗</a>"
+  )
+  return re.sub(
+    re.escape(display_term),
+    lambda
+    match: f"<span data-tooltip>{match.group(0)}<span class='tooltip-content'>{clean_description} {glossary_link}</span></span>",
+    html,
+    flags=re.IGNORECASE,
+  )
+
+def apply_tooltip(_term_key, _definition, pattern, html):
+  return re.sub(
+    pattern,
+    lambda match: format_tooltip_html(_term_key, _definition, match.group(0)),
+    html,
+    flags=re.IGNORECASE,
+  )
+
+def tooltip_html(vocabulary, html):
+  for _category, terms in vocabulary.items():
+    for term_key, definition in terms.items():
+      if definition.get("description"):
+        pattern = rf"(?<!\w){re.escape(term_key.replace('_', ' ').title())}(?![^<]*<\/a>)(?!\([^)]*\))"
+        html = apply_tooltip(term_key, definition, pattern, html)
+  return html
+
+# Page Hooks
+def on_page_markdown(markdown, **kwargs):
+  glossary = load_glossary()
+  return markdown.replace("{{GLOSSARY_DEFINITIONS}}", glossary_markdown(glossary))
+
+def on_page_content(html, **kwargs):
+  if kwargs.get("page").title == "Glossary":
+    return html
+  glossary = load_glossary()
+  return tooltip_html(glossary, html)
@@ -7,7 +7,7 @@ export OPENBLAS_NUM_THREADS=1
 export VECLIB_MAXIMUM_THREADS=1

 if [ -z "$AGNOS_VERSION" ]; then
-  export AGNOS_VERSION="11.3"
+  export AGNOS_VERSION="11.4"
 fi

 export STAGING_ROOT="/data/safe_staging"
@@ -8,6 +8,10 @@ strict: true
 docs_dir: docs
 site_dir: docs_site/

+hooks:
+  - docs/hooks/glossary.py
+extra_css:
+  - css/tooltip.css
 theme:
  name: readthedocs
  navigation_depth: 3
@@ -0,0 +1 @@
+../sunnypilot
@@ -1,6 +1,6 @@
 [project]
 name = "openpilot"
-requires-python = ">= 3.11, <= 3.12"
+requires-python = ">= 3.11, < 3.13"
 license = {text = "MIT License"}
 version = "0.1.0"
 description = "an open source driver assistance system"
@@ -42,8 +42,7 @@ dependencies = [

  # modeld
  "onnx >= 1.14.0",
-  "onnxruntime >=1.16.3; platform_system == 'Linux' and platform_machine == 'aarch64'",
-  "onnxruntime-gpu >=1.16.3; platform_system == 'Linux' and platform_machine == 'x86_64'",
+  "onnxruntime >=1.16.3",

  # logging
  "pyzmq",
@@ -54,7 +54,7 @@ whitelist = [
  "tools/joystick/",
  "tools/longitudinal_maneuvers/",

-  "tinygrad_repo/openpilot/compile2.py",
+  "tinygrad_repo/examples/openpilot/compile3.py",
  "tinygrad_repo/extra/onnx.py",
  "tinygrad_repo/extra/onnx_ops.py",
  "tinygrad_repo/extra/thneed.py",
@@ -13,7 +13,7 @@ cd $ROOT

 FAILED=0

-IGNORED_FILES="uv\.lock|docs\/CARS.md"
+IGNORED_FILES="uv\.lock|docs\/CARS.md|LICENSE\.md|.*\.zst"
 IGNORED_DIRS="^third_party.*|^msgq.*|^msgq_repo.*|^opendbc.*|^opendbc_repo.*|^cereal.*|^panda.*|^rednose.*|^rednose_repo.*|^tinygrad.*|^tinygrad_repo.*|^teleoprtc.*|^teleoprtc_repo.*"

 function run() {
@@ -148,7 +148,8 @@ class CarSpecificEvents:
      # To avoid re-engaging when openpilot cancels, check user engagement intention via buttons
      # Main button also can trigger an engagement on these cars
      self.cruise_buttons.append(any(ev.type in HYUNDAI_ENABLE_BUTTONS for ev in CS.buttonEvents))
-      events = self.create_common_events(CS, CS_prev, pcm_enable=self.CP.pcmCruise, allow_enable=any(self.cruise_buttons))
+      events = self.create_common_events(CS, CS_prev, extra_gears=(GearShifter.sport, GearShifter.manumatic),
+                                         pcm_enable=self.CP.pcmCruise, allow_enable=any(self.cruise_buttons))

      # low speed steer alert hysteresis logic (only for cars with steer cut off above 10 m/s)
      if CS.vEgo < (self.CP.minSteerSpeed + 2.) and self.CP.minSteerSpeed > 10.:
@@ -5,12 +5,15 @@ from openpilot.common.realtime import DT_CTRL
 MIN_SPEED = 1.0
 CONTROL_N = 17
 CAR_ROTATION_RADIUS = 0.0
+# This is a turn radius smaller than most cars can achieve
+MAX_CURVATURE = 0.2

 # EU guidelines
 MAX_LATERAL_JERK = 5.0
 MAX_VEL_ERR = 5.0

 def clip_curvature(v_ego, prev_curvature, new_curvature):
+  new_curvature = clip(new_curvature, -MAX_CURVATURE, MAX_CURVATURE)
  v_ego = max(MIN_SPEED, v_ego)
  max_curvature_rate = MAX_LATERAL_JERK / (v_ego**2) # inexact calculation, check https://github.com/commaai/openpilot/pull/24755
  safe_desired_curvature = clip(new_curvature,
@@ -50,24 +50,20 @@ def limit_accel_in_turns(v_ego, angle_steers, a_target, CP):
  return [a_target[0], min(a_target[1], a_x_allowed)]


-def get_accel_from_plan(CP, speeds, accels):
+def get_accel_from_plan(speeds, accels, action_t=DT_MDL, vEgoStopping=0.05):
  if len(speeds) == CONTROL_N:
-    v_target_now = interp(DT_MDL, CONTROL_N_T_IDX, speeds)
-    a_target_now = interp(DT_MDL, CONTROL_N_T_IDX, accels)
+    v_now = speeds[0]
+    a_now = accels[0]

-    v_target = interp(CP.longitudinalActuatorDelay + DT_MDL, CONTROL_N_T_IDX, speeds)
-    if v_target != v_target_now:
-      a_target = 2 * (v_target - v_target_now) / CP.longitudinalActuatorDelay - a_target_now
-    else:
-      a_target = a_target_now
-
-    v_target_1sec = interp(CP.longitudinalActuatorDelay + DT_MDL + 1.0, CONTROL_N_T_IDX, speeds)
+    v_target = interp(action_t, CONTROL_N_T_IDX, speeds)
+    a_target = 2 * (v_target - v_now) / (action_t) - a_now
+    v_target_1sec = interp(action_t + 1.0, CONTROL_N_T_IDX, speeds)
  else:
    v_target = 0.0
    v_target_1sec = 0.0
    a_target = 0.0
-  should_stop = (v_target < CP.vEgoStopping and
-                 v_target_1sec < CP.vEgoStopping)
+  should_stop = (v_target < vEgoStopping and
+                 v_target_1sec < vEgoStopping)
  return a_target, should_stop


@@ -201,7 +197,9 @@ class LongitudinalPlanner:
    longitudinalPlan.longitudinalPlanSource = self.mpc.source
    longitudinalPlan.fcw = self.fcw

-    a_target, should_stop = get_accel_from_plan(self.CP, longitudinalPlan.speeds, longitudinalPlan.accels)
+    action_t =  self.CP.longitudinalActuatorDelay + DT_MDL
+    a_target, should_stop = get_accel_from_plan(longitudinalPlan.speeds, longitudinalPlan.accels,
+                                                action_t=action_t, vEgoStopping=self.CP.vEgoStopping)
    longitudinalPlan.aTarget = a_target
    longitudinalPlan.shouldStop = should_stop
    longitudinalPlan.allowBrake = True
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import argparse
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from openpilot.tools.lib.logreader import LogReader
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+  parser.add_argument('--width', default=2160, type=int)
+  parser.add_argument('--height', default=1080, type=int)
+  parser.add_argument('--route', default='rlog', type=str)
+  args = parser.parse_args()
+
+  w = args.width
+  h = args.height
+  route = args.route
+
+  fingers = [[-1, -1]] * 5
+  touch_points = []
+  current_slot = 0
+
+  lr = list(LogReader(route))
+  for msg in lr:
+    if msg.which() == 'touch':
+      for event in msg.touch:
+        if event.type == 3 and event.code == 47:
+          current_slot = event.value
+        elif event.type == 3 and event.code == 57 and event.value == -1:
+          fingers[current_slot] = [-1, -1]
+        elif event.type == 3 and event.code == 53:
+          fingers[current_slot][1] = h - (h - event.value)
+          if fingers[current_slot][0] != -1:
+            touch_points.append(fingers[current_slot].copy())
+        elif event.type == 3 and event.code == 54:
+          fingers[current_slot][0] = w - event.value
+          if fingers[current_slot][1] != -1:
+            touch_points.append(fingers[current_slot].copy())
+
+  if not touch_points:
+    print(f'No touch events found for {route}')
+    quit()
+
+  unique_points, counts = np.unique(touch_points, axis=0, return_counts=True)
+
+  plt.figure(figsize=(10, 3))
+  plt.scatter(unique_points[:, 0], unique_points[:, 1], c=counts, s=counts * 20, edgecolors='red')
+  plt.colorbar()
+  plt.title(f'Touches for {route}')
+  plt.xlim(0, w)
+  plt.ylim(0, h)
+  plt.grid(True)
+  plt.show()
@@ -13,20 +13,6 @@ common_src = [
  "transforms/transform.cc",
 ]

-thneed_src_common = [
-  "thneed/thneed_common.cc",
-  "thneed/serialize.cc",
-]
-
-thneed_src_qcom = thneed_src_common + ["thneed/thneed_qcom2.cc"]
-thneed_src_pc = thneed_src_common + ["thneed/thneed_pc.cc"]
-thneed_src = thneed_src_qcom if arch == "larch64" else thneed_src_pc
-
-# SNPE except on Mac and ARM Linux
-snpe_lib = []
-if arch != "Darwin" and arch != "aarch64":
-  common_src += ['runners/snpemodel.cc']
-  snpe_lib += ['SNPE']

 # OpenCL is a framework on Mac
 if arch == "Darwin":
@@ -45,34 +31,24 @@ snpe_rpath_pc = f"{Dir('#').abspath}/third_party/snpe/x86_64-linux-clang"
 snpe_rpath = lenvCython['RPATH'] + [snpe_rpath_qcom if arch == "larch64" else snpe_rpath_pc]

 cython_libs = envCython["LIBS"] + libs
-snpemodel_lib = lenv.Library('snpemodel', ['runners/snpemodel.cc'])
 commonmodel_lib = lenv.Library('commonmodel', common_src)
-
-lenvCython.Program('runners/runmodel_pyx.so', 'runners/runmodel_pyx.pyx', LIBS=cython_libs, FRAMEWORKS=frameworks)
-lenvCython.Program('runners/snpemodel_pyx.so', 'runners/snpemodel_pyx.pyx', LIBS=[snpemodel_lib, snpe_lib, *cython_libs], FRAMEWORKS=frameworks, RPATH=snpe_rpath)
 lenvCython.Program('models/commonmodel_pyx.so', 'models/commonmodel_pyx.pyx', LIBS=[commonmodel_lib, *cython_libs], FRAMEWORKS=frameworks)
-
-tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath)]
+tinygrad_files = ["#"+x for x in glob.glob(env.Dir("#tinygrad_repo").relpath + "/**", recursive=True, root_dir=env.Dir("#").abspath) if 'pycache' not in x]

 # Get model metadata
 fn = File("models/supercombo").abspath
 cmd = f'python3 {Dir("#selfdrive/modeld").abspath}/get_model_metadata.py {fn}.onnx'
 lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

-# Build thneed model
-if arch == "larch64" or GetOption('pc_thneed'):
-  tinygrad_opts = []
-  if not GetOption('pc_thneed'):
-    # use FLOAT16 on device for speed + don't cache the CL kernels for space
-    tinygrad_opts += ["FLOAT16=1", "PYOPENCL_NO_CACHE=1"]
-  cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn}.onnx {fn}.thneed"
+# Compile tinygrad model
+pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
+if arch == 'larch64':
+  device_string = 'QCOM=1'
+else:
+  device_string = 'CLANG=1 IMAGE=0'

-  lenv.Command(fn + ".thneed", [fn + ".onnx"] + tinygrad_files, cmd)
+for model_name in ['supercombo', 'dmonitoring_model']:
+  fn = File(f"models/{model_name}").abspath
+  cmd = f'{pythonpath_string} {device_string} python3 {Dir("#tinygrad_repo").abspath}/examples/openpilot/compile3.py {fn}.onnx {fn}_tinygrad.pkl'
+  lenv.Command(fn + "_tinygrad.pkl", [fn + ".onnx"] + tinygrad_files, cmd)

-  fn_dm = File("models/dmonitoring_model").abspath
-  cmd = f"cd {Dir('#').abspath}/tinygrad_repo && " + ' '.join(tinygrad_opts) + f" python3 openpilot/compile2.py {fn_dm}.onnx {fn_dm}.thneed"
-  lenv.Command(fn_dm + ".thneed", [fn_dm + ".onnx"] + tinygrad_files, cmd)
-
-  thneed_lib = env.SharedLibrary('thneed', thneed_src, LIBS=[gpucommon, common, 'OpenCL', 'dl'])
-  thneedmodel_lib = env.Library('thneedmodel', ['runners/thneedmodel.cc'])
-  lenvCython.Program('runners/thneedmodel_pyx.so', 'runners/thneedmodel_pyx.pyx', LIBS=envCython["LIBS"]+[thneedmodel_lib, thneed_lib, gpucommon, common, 'dl', 'OpenCL'])
@@ -1,10 +1,4 @@
 #!/usr/bin/env bash

 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
-cd "$DIR/../../"
-
-if [ -f "$DIR/libthneed.so" ]; then
-  export LD_PRELOAD="$DIR/libthneed.so"
-fi
-
 exec "$DIR/dmonitoringmodeld.py" "$@"
@@ -1,8 +1,17 @@
 #!/usr/bin/env python3
 import os
+from openpilot.system.hardware import TICI
+if TICI:
+  from tinygrad.tensor import Tensor
+  from tinygrad.dtype import dtypes
+  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+  os.environ['QCOM'] = '1'
+else:
+  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
 import gc
 import math
 import time
+import pickle
 import ctypes
 import numpy as np
 from pathlib import Path
@@ -13,21 +22,20 @@ from cereal.messaging import PubMaster, SubMaster
 from msgq.visionipc import VisionIpcClient, VisionStreamType, VisionBuf
 from openpilot.common.swaglog import cloudlog
 from openpilot.common.realtime import set_realtime_priority
-from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
-from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext
+from openpilot.common.transformations.model import dmonitoringmodel_intrinsics, DM_INPUT_SIZE
+from openpilot.common.transformations.camera import _ar_ox_fisheye, _os_fisheye
+from openpilot.selfdrive.modeld.models.commonmodel_pyx import CLContext, MonitoringModelFrame
 from openpilot.selfdrive.modeld.parse_model_outputs import sigmoid

+MODEL_WIDTH, MODEL_HEIGHT = DM_INPUT_SIZE
 CALIB_LEN = 3
-MODEL_WIDTH = 1440
-MODEL_HEIGHT = 960
 FEATURE_LEN = 512
 OUTPUT_SIZE = 84 + FEATURE_LEN

 PROCESS_NAME = "selfdrive.modeld.dmonitoringmodeld"
 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')
-MODEL_PATHS = {
-  ModelRunner.THNEED: Path(__file__).parent / 'models/dmonitoring_model.thneed',
-  ModelRunner.ONNX: Path(__file__).parent / 'models/dmonitoring_model.onnx'}
+MODEL_PATH = Path(__file__).parent / 'models/dmonitoring_model.onnx'
+MODEL_PKL_PATH = Path(__file__).parent / 'models/dmonitoring_model_tinygrad.pkl'

 class DriverStateResult(ctypes.Structure):
  _fields_ = [
@@ -58,33 +66,42 @@ class DMonitoringModelResult(ctypes.Structure):
 class ModelState:
  inputs: dict[str, np.ndarray]
  output: np.ndarray
-  model: ModelRunner

  def __init__(self, cl_ctx):
    assert ctypes.sizeof(DMonitoringModelResult) == OUTPUT_SIZE * ctypes.sizeof(ctypes.c_float)
-    self.output = np.zeros(OUTPUT_SIZE, dtype=np.float32)
-    self.inputs = {
-      'input_img': np.zeros(MODEL_HEIGHT * MODEL_WIDTH, dtype=np.uint8),
-      'calib': np.zeros(CALIB_LEN, dtype=np.float32)}

-    self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, cl_ctx)
-    self.model.addInput("input_img", None)
-    self.model.addInput("calib", self.inputs['calib'])
+    self.frame = MonitoringModelFrame(cl_ctx)
+    self.numpy_inputs = {
+      'calib': np.zeros((1, CALIB_LEN), dtype=np.float32),
+    }

-  def run(self, buf:VisionBuf, calib:np.ndarray) -> tuple[np.ndarray, float]:
-    self.inputs['calib'][:] = calib
+    if TICI:
+      self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
+      with open(MODEL_PKL_PATH, "rb") as f:
+        self.model_run = pickle.load(f)
+    else:
+      self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)

-    v_offset = buf.height - MODEL_HEIGHT
-    h_offset = (buf.width - MODEL_WIDTH) // 2
-    buf_data = buf.data.reshape(-1, buf.stride)
-    input_data = self.inputs['input_img'].reshape(MODEL_HEIGHT, MODEL_WIDTH)
-    input_data[:] = buf_data[v_offset:v_offset+MODEL_HEIGHT, h_offset:h_offset+MODEL_WIDTH]
+  def run(self, buf:VisionBuf, calib:np.ndarray, transform:np.ndarray) -> tuple[np.ndarray, float]:
+    self.numpy_inputs['calib'][0,:] = calib

-    self.model.setInputBuffer("input_img", self.inputs['input_img'].view(np.float32))
    t1 = time.perf_counter()
-    self.model.execute()
+
+    input_img_cl = self.frame.prepare(buf, transform.flatten())
+    if TICI:
+      # The imgs tensors are backed by opencl memory, only need init once
+      if 'input_img' not in self.tensor_inputs:
+        self.tensor_inputs['input_img'] = qcom_tensor_from_opencl_address(input_img_cl.mem_address, (1, MODEL_WIDTH*MODEL_HEIGHT), dtype=dtypes.uint8)
+    else:
+      self.numpy_inputs['input_img'] = self.frame.buffer_from_cl(input_img_cl).reshape((1, MODEL_WIDTH*MODEL_HEIGHT))
+
+    if TICI:
+      output = self.model_run(**self.tensor_inputs).numpy().flatten()
+    else:
+      output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
+
    t2 = time.perf_counter()
-    return self.output, t2 - t1
+    return output, t2 - t1


 def fill_driver_state(msg, ds_result: DriverStateResult):
@@ -137,18 +154,23 @@ def main():
  pm = PubMaster(["driverStateV2"])

  calib = np.zeros(CALIB_LEN, dtype=np.float32)
+  model_transform = None

  while True:
    buf = vipc_client.recv()
    if buf is None:
      continue

+    if model_transform is None:
+      cam = _os_fisheye if buf.width == _os_fisheye.width else _ar_ox_fisheye
+      model_transform = np.linalg.inv(np.dot(dmonitoringmodel_intrinsics, np.linalg.inv(cam.intrinsics))).astype(np.float32)
+
    sm.update(0)
    if sm.updated["liveCalibration"]:
      calib[:] = np.array(sm["liveCalibration"].rpyCalib)

    t1 = time.perf_counter()
-    model_output, gpu_execution_time = model.run(buf, calib)
+    model_output, gpu_execution_time = model.run(buf, calib, model_transform)
    t2 = time.perf_counter()

    pm.send("driverStateV2", get_driverstate_packet(model_output, vipc_client.frame_id, vipc_client.timestamp_sof, t2 - t1, gpu_execution_time))
@@ -3,11 +3,22 @@ import capnp
 import numpy as np
 from cereal import log
 from openpilot.selfdrive.modeld.constants import ModelConstants, Plan, Meta
+from openpilot.selfdrive.controls.lib.drive_helpers import MIN_SPEED

 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')

 ConfidenceClass = log.ModelDataV2.ConfidenceClass

+def curv_from_psis(psi_target, psi_rate, vego, delay):
+  vego = np.clip(vego, MIN_SPEED, np.inf)
+  curv_from_psi = psi_target / (vego * delay)  # epsilon to prevent divide-by-zero
+  return 2*curv_from_psi - psi_rate / vego
+
+def get_curvature_from_plan(plan, vego, delay):
+  psi_target = np.interp(delay, ModelConstants.T_IDXS, plan[:, Plan.T_FROM_CURRENT_EULER][:, 2])
+  psi_rate = plan[:, Plan.ORIENTATION_RATE][0, 2]
+  return curv_from_psis(psi_target, psi_rate, vego, delay)
+
 class PublishState:
  def __init__(self):
    self.disengage_buffer = np.zeros(ModelConstants.CONFIDENCE_BUFFER_LEN*ModelConstants.DISENGAGE_WIDTH, dtype=np.float32)
@@ -55,14 +66,17 @@ def fill_lane_line_meta(builder, lane_lines, lane_line_probs):
  builder.rightProb = lane_line_probs[2]

 def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._DynamicStructBuilder,
-                   net_output_data: dict[str, np.ndarray], publish_state: PublishState,
-                   vipc_frame_id: int, vipc_frame_id_extra: int, frame_id: int, frame_drop: float,
-                   timestamp_eof: int, model_execution_time: float, valid: bool) -> None:
+                   net_output_data: dict[str, np.ndarray], v_ego: float, delay: float,
+                   publish_state: PublishState, vipc_frame_id: int, vipc_frame_id_extra: int,
+                   frame_id: int, frame_drop: float, timestamp_eof: int, model_execution_time: float,
+                   valid: bool) -> None:
  frame_age = frame_id - vipc_frame_id if frame_id > vipc_frame_id else 0
  frame_drop_perc = frame_drop * 100
  extended_msg.valid = valid
  base_msg.valid = valid

+  desired_curv = float(get_curvature_from_plan(net_output_data['plan'][0], v_ego, delay))
+
  driving_model_data = base_msg.drivingModelData

  driving_model_data.frameId = vipc_frame_id
@@ -71,7 +85,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D
  driving_model_data.modelExecutionTime = model_execution_time

  action = driving_model_data.action
-  action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
+  action.desiredCurvature = desired_curv

  modelV2 = extended_msg.modelV2
  modelV2.frameId = vipc_frame_id
@@ -106,7 +120,7 @@ def fill_model_msg(base_msg: capnp._DynamicStructBuilder, extended_msg: capnp._D

  # lateral planning
  action = modelV2.action
-  action.desiredCurvature = float(net_output_data['desired_curvature'][0,0])
+  action.desiredCurvature = desired_curv

  # times at X_IDXS according to model plan
  PLAN_T_IDXS = [np.nan] * ModelConstants.IDX_N
@@ -1,10 +1,4 @@
 #!/usr/bin/env bash

 DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null && pwd)"
-cd "$DIR/../../"
-
-if [ -f "$DIR/libthneed.so" ]; then
-  export LD_PRELOAD="$DIR/libthneed.so"
-fi
-
 exec "$DIR/modeld.py" "$@"
@@ -1,5 +1,15 @@
 #!/usr/bin/env python3
 import os
+from openpilot.system.hardware import TICI
+
+#
+if TICI:
+  from tinygrad.tensor import Tensor
+  from tinygrad.dtype import dtypes
+  from openpilot.selfdrive.modeld.runners.tinygrad_helpers import qcom_tensor_from_opencl_address
+  os.environ['QCOM'] = '1'
+else:
+  from openpilot.selfdrive.modeld.runners.ort_helpers import make_onnx_cpu_runner
 import time
 import pickle
 import numpy as np
@@ -18,22 +28,19 @@ from openpilot.common.transformations.camera import DEVICE_CAMERAS
 from openpilot.common.transformations.model import get_warp_matrix
 from openpilot.system import sentry
 from openpilot.selfdrive.controls.lib.desire_helper import DesireHelper
-from openpilot.selfdrive.modeld.runners import ModelRunner, Runtime
 from openpilot.selfdrive.modeld.parse_model_outputs import Parser
 from openpilot.selfdrive.modeld.fill_model_msg import fill_model_msg, fill_pose_msg, PublishState
 from openpilot.selfdrive.modeld.constants import ModelConstants
-from openpilot.selfdrive.modeld.models.commonmodel_pyx import ModelFrame, CLContext
+from openpilot.selfdrive.modeld.models.commonmodel_pyx import DrivingModelFrame, CLContext
+

 PROCESS_NAME = "selfdrive.modeld.modeld"
 SEND_RAW_PRED = os.getenv('SEND_RAW_PRED')

-MODEL_PATHS = {
-  ModelRunner.THNEED: Path(__file__).parent / 'models/supercombo.thneed',
-  ModelRunner.ONNX: Path(__file__).parent / 'models/supercombo.onnx'}
-
+MODEL_PATH = Path(__file__).parent / 'models/supercombo.onnx'
+MODEL_PKL_PATH = Path(__file__).parent / 'models/supercombo_tinygrad.pkl'
 METADATA_PATH = Path(__file__).parent / 'models/supercombo_metadata.pkl'

-
 class FrameMeta:
  frame_id: int = 0
  timestamp_sof: int = 0
@@ -44,43 +51,39 @@ class FrameMeta:
      self.frame_id, self.timestamp_sof, self.timestamp_eof = vipc.frame_id, vipc.timestamp_sof, vipc.timestamp_eof

 class ModelState:
-  frame: ModelFrame
-  wide_frame: ModelFrame
+  frames: dict[str, DrivingModelFrame]
  inputs: dict[str, np.ndarray]
  output: np.ndarray
  prev_desire: np.ndarray  # for tracking the rising edge of the pulse
-  model: ModelRunner

  def __init__(self, context: CLContext):
-    self.frame = ModelFrame(context)
-    self.wide_frame = ModelFrame(context)
+    self.frames = {'input_imgs': DrivingModelFrame(context), 'big_input_imgs': DrivingModelFrame(context)}
    self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)
    self.full_features_20Hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN, ModelConstants.FEATURE_LEN), dtype=np.float32)
    self.desire_20Hz =  np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.DESIRE_LEN), dtype=np.float32)
-    self.prev_desired_curv_20hz = np.zeros((ModelConstants.FULL_HISTORY_BUFFER_LEN + 1, ModelConstants.PREV_DESIRED_CURV_LEN), dtype=np.float32)

    # img buffers are managed in openCL transform code
-    self.inputs = {
-      'desire': np.zeros(ModelConstants.DESIRE_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
-      'traffic_convention': np.zeros(ModelConstants.TRAFFIC_CONVENTION_LEN, dtype=np.float32),
-      'lateral_control_params': np.zeros(ModelConstants.LATERAL_CONTROL_PARAMS_LEN, dtype=np.float32),
-      'prev_desired_curv': np.zeros(ModelConstants.PREV_DESIRED_CURV_LEN * (ModelConstants.HISTORY_BUFFER_LEN+1), dtype=np.float32),
-      'features_buffer': np.zeros(ModelConstants.HISTORY_BUFFER_LEN * ModelConstants.FEATURE_LEN, dtype=np.float32),
+    self.numpy_inputs = {
+      'desire': np.zeros((1, (ModelConstants.HISTORY_BUFFER_LEN+1), ModelConstants.DESIRE_LEN), dtype=np.float32),
+      'traffic_convention': np.zeros((1, ModelConstants.TRAFFIC_CONVENTION_LEN), dtype=np.float32),
+      'features_buffer': np.zeros((1, ModelConstants.HISTORY_BUFFER_LEN,  ModelConstants.FEATURE_LEN), dtype=np.float32),
    }

    with open(METADATA_PATH, 'rb') as f:
      model_metadata = pickle.load(f)
+    self.input_shapes =  model_metadata['input_shapes']

    self.output_slices = model_metadata['output_slices']
    net_output_size = model_metadata['output_shapes']['outputs'][1]
    self.output = np.zeros(net_output_size, dtype=np.float32)
    self.parser = Parser()

-    self.model = ModelRunner(MODEL_PATHS, self.output, Runtime.GPU, False, context)
-    self.model.addInput("input_imgs", None)
-    self.model.addInput("big_input_imgs", None)
-    for k,v in self.inputs.items():
-      self.model.addInput(k, v)
+    if TICI:
+      self.tensor_inputs = {k: Tensor(v, device='NPY').realize() for k,v in self.numpy_inputs.items()}
+      with open(MODEL_PKL_PATH, "rb") as f:
+        self.model_run = pickle.load(f)
+    else:
+      self.onnx_cpu_runner = make_onnx_cpu_runner(MODEL_PATH)

  def slice_outputs(self, model_outputs: np.ndarray) -> dict[str, np.ndarray]:
    parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in self.output_slices.items()}
@@ -97,30 +100,36 @@ class ModelState:

    self.desire_20Hz[:-1] = self.desire_20Hz[1:]
    self.desire_20Hz[-1] = new_desire
-    self.inputs['desire'][:] = self.desire_20Hz.reshape((25,4,-1)).max(axis=1).flatten()
+    self.numpy_inputs['desire'][:] = self.desire_20Hz.reshape((1,25,4,-1)).max(axis=2)

-    self.inputs['traffic_convention'][:] = inputs['traffic_convention']
-    self.inputs['lateral_control_params'][:] = inputs['lateral_control_params']
+    self.numpy_inputs['traffic_convention'][:] = inputs['traffic_convention']
+    imgs_cl = {'input_imgs': self.frames['input_imgs'].prepare(buf, transform.flatten()),
+               'big_input_imgs': self.frames['big_input_imgs'].prepare(wbuf, transform_wide.flatten())}

-    self.model.setInputBuffer("input_imgs", self.frame.prepare(buf, transform.flatten(), self.model.getCLBuffer("input_imgs")))
-    self.model.setInputBuffer("big_input_imgs", self.wide_frame.prepare(wbuf, transform_wide.flatten(), self.model.getCLBuffer("big_input_imgs")))
+    if TICI:
+      # The imgs tensors are backed by opencl memory, only need init once
+      for key in imgs_cl:
+        if key not in self.tensor_inputs:
+          self.tensor_inputs[key] = qcom_tensor_from_opencl_address(imgs_cl[key].mem_address, self.input_shapes[key], dtype=dtypes.uint8)
+    else:
+      for key in imgs_cl:
+        self.numpy_inputs[key] = self.frames[key].buffer_from_cl(imgs_cl[key]).reshape(self.input_shapes[key])

    if prepare_only:
      return None

-    self.model.execute()
+    if TICI:
+      self.output = self.model_run(**self.tensor_inputs).numpy().flatten()
+    else:
+      self.output = self.onnx_cpu_runner.run(None, self.numpy_inputs)[0].flatten()
+
    outputs = self.parser.parse_outputs(self.slice_outputs(self.output))

    self.full_features_20Hz[:-1] = self.full_features_20Hz[1:]
    self.full_features_20Hz[-1] = outputs['hidden_state'][0, :]

-    self.prev_desired_curv_20hz[:-1] = self.prev_desired_curv_20hz[1:]
-    self.prev_desired_curv_20hz[-1] = outputs['desired_curvature'][0, :]
-
    idxs = np.arange(-4,-100,-4)[::-1]
-    self.inputs['features_buffer'][:] = self.full_features_20Hz[idxs].flatten()
-    # TODO model only uses last value now, once that changes we need to input strided action history buffer
-    self.inputs['prev_desired_curv'][-ModelConstants.PREV_DESIRED_CURV_LEN:] = 0. * self.prev_desired_curv_20hz[-4, :]
+    self.numpy_inputs['features_buffer'][:] = self.full_features_20Hz[idxs]
    return outputs


@@ -231,7 +240,6 @@ def main(demo=False):
    is_rhd = sm["driverMonitoringState"].isRHD
    frame_id = sm["roadCameraState"].frameId
    v_ego = max(sm["carState"].vEgo, 0.)
-    lateral_control_params = np.array([v_ego, steer_delay], dtype=np.float32)
    if sm.updated["liveCalibration"] and sm.seen['roadCameraState'] and sm.seen['deviceState']:
      device_from_calib_euler = np.array(sm["liveCalibration"].rpyCalib, dtype=np.float32)
      dc = DEVICE_CAMERAS[(str(sm['deviceState'].deviceType), str(sm['roadCameraState'].sensor))]
@@ -262,7 +270,6 @@ def main(demo=False):
    inputs:dict[str, np.ndarray] = {
      'desire': vec_desire,
      'traffic_convention': traffic_convention,
-      'lateral_control_params': lateral_control_params,
      }

    mt1 = time.perf_counter()
@@ -274,7 +281,8 @@ def main(demo=False):
      modelv2_send = messaging.new_message('modelV2')
      drivingdata_send = messaging.new_message('drivingModelData')
      posenet_send = messaging.new_message('cameraOdometry')
-      fill_model_msg(drivingdata_send, modelv2_send, model_output, publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,
+      fill_model_msg(drivingdata_send, modelv2_send, model_output, v_ego, steer_delay,
+                     publish_state, meta_main.frame_id, meta_extra.frame_id, frame_id,
                     frame_drop_ratio, meta_main.timestamp_eof, model_execution_time, live_calib_seen)

      desire_state = modelv2_send.modelV2.meta.desireState
@@ -291,7 +299,6 @@ def main(demo=False):
      pm.send('modelV2', modelv2_send)
      pm.send('drivingModelData', drivingdata_send)
      pm.send('cameraOdometry', posenet_send)
-
    last_vipc_frame_id = meta_main.frame_id


@@ -1,58 +1,61 @@
 #include "selfdrive/modeld/models/commonmodel.h"

-#include <cassert>
 #include <cmath>
 #include <cstring>

 #include "common/clutil.h"

-ModelFrame::ModelFrame(cl_device_id device_id, cl_context context) {
+DrivingModelFrame::DrivingModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
  input_frames = std::make_unique<uint8_t[]>(buf_size);
-
-  q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
-  y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, MODEL_WIDTH * MODEL_HEIGHT, NULL, &err));
-  u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
-  v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (MODEL_WIDTH / 2) * (MODEL_HEIGHT / 2), NULL, &err));
+  input_frames_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
  img_buffer_20hz_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, 5*frame_size_bytes, NULL, &err));
  region.origin = 4 * frame_size_bytes;
  region.size = frame_size_bytes;
  last_img_cl = CL_CHECK_ERR(clCreateSubBuffer(img_buffer_20hz_cl, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &err));

-  transform_init(&transform, context, device_id);
  loadyuv_init(&loadyuv, context, device_id, MODEL_WIDTH, MODEL_HEIGHT);
+  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
 }

-uint8_t* ModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3 &projection, cl_mem *output) {
-  transform_queue(&this->transform, q,
-                yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
-                y_cl, u_cl, v_cl, MODEL_WIDTH, MODEL_HEIGHT, projection);
+cl_mem* DrivingModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
+  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);

  for (int i = 0; i < 4; i++) {
    CL_CHECK(clEnqueueCopyBuffer(q, img_buffer_20hz_cl, img_buffer_20hz_cl, (i+1)*frame_size_bytes, i*frame_size_bytes, frame_size_bytes, 0, nullptr, nullptr));
  }
  loadyuv_queue(&loadyuv, q, y_cl, u_cl, v_cl, last_img_cl);
-  if (output == NULL) {
-    CL_CHECK(clEnqueueReadBuffer(q, img_buffer_20hz_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[0], 0, nullptr, nullptr));
-    CL_CHECK(clEnqueueReadBuffer(q, last_img_cl, CL_TRUE, 0, frame_size_bytes, &input_frames[MODEL_FRAME_SIZE], 0, nullptr, nullptr));
-    clFinish(q);
-    return &input_frames[0];
-  } else {
-    copy_queue(&loadyuv, q, img_buffer_20hz_cl, *output, 0, 0, frame_size_bytes);
-    copy_queue(&loadyuv, q, last_img_cl, *output, 0, frame_size_bytes, frame_size_bytes);

-    // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
-    clFinish(q);
-    return NULL;
-  }
+  copy_queue(&loadyuv, q, img_buffer_20hz_cl, input_frames_cl, 0, 0, frame_size_bytes);
+  copy_queue(&loadyuv, q, last_img_cl, input_frames_cl, 0, frame_size_bytes, frame_size_bytes);
+
+  // NOTE: Since thneed is using a different command queue, this clFinish is needed to ensure the image is ready.
+  clFinish(q);
+  return &input_frames_cl;
 }

-ModelFrame::~ModelFrame() {
-  transform_destroy(&transform);
+DrivingModelFrame::~DrivingModelFrame() {
+  deinit_transform();
  loadyuv_destroy(&loadyuv);
  CL_CHECK(clReleaseMemObject(img_buffer_20hz_cl));
  CL_CHECK(clReleaseMemObject(last_img_cl));
-  CL_CHECK(clReleaseMemObject(v_cl));
-  CL_CHECK(clReleaseMemObject(u_cl));
-  CL_CHECK(clReleaseMemObject(y_cl));
  CL_CHECK(clReleaseCommandQueue(q));
-}
+}
+
+
+MonitoringModelFrame::MonitoringModelFrame(cl_device_id device_id, cl_context context) : ModelFrame(device_id, context) {
+  input_frames = std::make_unique<uint8_t[]>(buf_size);
+  input_frame_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, buf_size, NULL, &err));
+
+  init_transform(device_id, context, MODEL_WIDTH, MODEL_HEIGHT);
+}
+
+cl_mem* MonitoringModelFrame::prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
+  run_transform(yuv_cl, MODEL_WIDTH, MODEL_HEIGHT, frame_width, frame_height, frame_stride, frame_uv_offset, projection);
+  clFinish(q);
+  return &y_cl;
+}
+
+MonitoringModelFrame::~MonitoringModelFrame() {
+  deinit_transform();
+  CL_CHECK(clReleaseCommandQueue(q));
+}
@@ -2,6 +2,7 @@

 #include <cfloat>
 #include <cstdlib>
+#include <cassert>

 #include <memory>

@@ -18,9 +19,54 @@

 class ModelFrame {
 public:
-  ModelFrame(cl_device_id device_id, cl_context context);
-  ~ModelFrame();
-  uint8_t* prepare(cl_mem yuv_cl, int width, int height, int frame_stride, int frame_uv_offset, const mat3& transform, cl_mem *output);
+  ModelFrame(cl_device_id device_id, cl_context context) {
+    q = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, 0, &err));
+  }
+  virtual ~ModelFrame() {}
+  virtual cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) { return NULL; }
+  uint8_t* buffer_from_cl(cl_mem *in_frames, int buffer_size) {
+    CL_CHECK(clEnqueueReadBuffer(q, *in_frames, CL_TRUE, 0, buffer_size, input_frames.get(), 0, nullptr, nullptr));
+    clFinish(q);
+    return &input_frames[0];
+  }
+
+  int MODEL_WIDTH;
+  int MODEL_HEIGHT;
+  int MODEL_FRAME_SIZE;
+  int buf_size;
+
+protected:
+  cl_mem y_cl, u_cl, v_cl;
+  Transform transform;
+  cl_command_queue q;
+  std::unique_ptr<uint8_t[]> input_frames;
+
+  void init_transform(cl_device_id device_id, cl_context context, int model_width, int model_height) {
+    y_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, model_width * model_height, NULL, &err));
+    u_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
+    v_cl = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_WRITE, (model_width / 2) * (model_height / 2), NULL, &err));
+    transform_init(&transform, context, device_id);
+  }
+
+  void deinit_transform() {
+    transform_destroy(&transform);
+    CL_CHECK(clReleaseMemObject(v_cl));
+    CL_CHECK(clReleaseMemObject(u_cl));
+    CL_CHECK(clReleaseMemObject(y_cl));
+  }
+
+  void run_transform(cl_mem yuv_cl, int model_width, int model_height, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection) {
+    transform_queue(&transform, q,
+        yuv_cl, frame_width, frame_height, frame_stride, frame_uv_offset,
+        y_cl, u_cl, v_cl, model_width, model_height, projection);
+  }
+};
+
+class DrivingModelFrame : public ModelFrame {
+public:
+  DrivingModelFrame(cl_device_id device_id, cl_context context);
+  ~DrivingModelFrame();
+  cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);

  const int MODEL_WIDTH = 512;
  const int MODEL_HEIGHT = 256;
@@ -29,10 +75,22 @@ public:
  const size_t frame_size_bytes = MODEL_FRAME_SIZE * sizeof(uint8_t);

 private:
-  Transform transform;
  LoadYUVState loadyuv;
-  cl_command_queue q;
-  cl_mem y_cl, u_cl, v_cl, img_buffer_20hz_cl, last_img_cl;
+  cl_mem img_buffer_20hz_cl, last_img_cl, input_frames_cl;
  cl_buffer_region region;
-  std::unique_ptr<uint8_t[]> input_frames;
-};
+};
+
+class MonitoringModelFrame : public ModelFrame {
+public:
+  MonitoringModelFrame(cl_device_id device_id, cl_context context);
+  ~MonitoringModelFrame();
+  cl_mem* prepare(cl_mem yuv_cl, int frame_width, int frame_height, int frame_stride, int frame_uv_offset, const mat3& projection);
+
+  const int MODEL_WIDTH = 1440;
+  const int MODEL_HEIGHT = 960;
+  const int MODEL_FRAME_SIZE = MODEL_WIDTH * MODEL_HEIGHT;
+  const int buf_size = MODEL_FRAME_SIZE;
+
+private:
+  cl_mem input_frame_cl;
+};
@@ -14,5 +14,13 @@ cdef extern from "common/clutil.h":
 cdef extern from "selfdrive/modeld/models/commonmodel.h":
  cppclass ModelFrame:
    int buf_size
-    ModelFrame(cl_device_id, cl_context)
-    unsigned char * prepare(cl_mem, int, int, int, int, mat3, cl_mem*)
+    unsigned char * buffer_from_cl(cl_mem*, int);
+    cl_mem * prepare(cl_mem, int, int, int, int, mat3)
+
+  cppclass DrivingModelFrame:
+    int buf_size
+    DrivingModelFrame(cl_device_id, cl_context)
+
+  cppclass MonitoringModelFrame:
+    int buf_size
+    MonitoringModelFrame(cl_device_id, cl_context)
@@ -4,11 +4,12 @@
 import numpy as np
 cimport numpy as cnp
 from libc.string cimport memcpy
+from libc.stdint cimport uintptr_t

 from msgq.visionipc.visionipc cimport cl_mem
 from msgq.visionipc.visionipc_pyx cimport VisionBuf, CLContext as BaseCLContext
 from .commonmodel cimport CL_DEVICE_TYPE_DEFAULT, cl_get_device_id, cl_create_context
-from .commonmodel cimport mat3, ModelFrame as cppModelFrame
+from .commonmodel cimport mat3, ModelFrame as cppModelFrame, DrivingModelFrame as cppDrivingModelFrame, MonitoringModelFrame as cppMonitoringModelFrame


 cdef class CLContext(BaseCLContext):
@@ -23,23 +24,47 @@ cdef class CLMem:
    mem.mem = <cl_mem*> cmem
    return mem

+  @property
+  def mem_address(self):
+    return <uintptr_t>(self.mem)
+
+def cl_from_visionbuf(VisionBuf buf):
+  return CLMem.create(<void*>&buf.buf.buf_cl)
+
+
 cdef class ModelFrame:
  cdef cppModelFrame * frame
-
-  def __cinit__(self, CLContext context):
-    self.frame = new cppModelFrame(context.device_id, context.context)
+  cdef int buf_size

  def __dealloc__(self):
    del self.frame

-  def prepare(self, VisionBuf buf, float[:] projection, CLMem output):
+  def prepare(self, VisionBuf buf, float[:] projection):
    cdef mat3 cprojection
    memcpy(cprojection.v, &projection[0], 9*sizeof(float))
-    cdef unsigned char * data
-    if output is None:
-      data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, NULL)
-    else:
-      data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection, output.mem)
-    if not data:
-      return None
-    return np.asarray(<cnp.uint8_t[:self.frame.buf_size]> data)
+    cdef cl_mem * data
+    data = self.frame.prepare(buf.buf.buf_cl, buf.width, buf.height, buf.stride, buf.uv_offset, cprojection)
+    return CLMem.create(data)
+
+  def buffer_from_cl(self, CLMem in_frames):
+    cdef unsigned char * data2
+    data2 = self.frame.buffer_from_cl(in_frames.mem, self.buf_size)
+    return np.asarray(<cnp.uint8_t[:self.buf_size]> data2)
+
+
+cdef class DrivingModelFrame(ModelFrame):
+  cdef cppDrivingModelFrame * _frame
+
+  def __cinit__(self, CLContext context):
+    self._frame = new cppDrivingModelFrame(context.device_id, context.context)
+    self.frame = <cppModelFrame*>(self._frame)
+    self.buf_size = self._frame.buf_size
+
+cdef class MonitoringModelFrame(ModelFrame):
+  cdef cppMonitoringModelFrame * _frame
+
+  def __cinit__(self, CLContext context):
+    self._frame = new cppMonitoringModelFrame(context.device_id, context.context)
+    self.frame = <cppModelFrame*>(self._frame)
+    self.buf_size = self._frame.buf_size
+
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dc64f5d1e7d6b67f1d4659a3483f03b4324b4c7b969a5ba90c4e37e62bf6fce
-size 50320584
+oid sha256:72d3d6f8d3c98f5431ec86be77b6350d7d4f43c25075c0106f1d1e7ec7c77668
+size 49096168
@@ -96,8 +96,6 @@ class Parser:
                   out_shape=(ModelConstants.LEAD_TRAJ_LEN,ModelConstants.LEAD_WIDTH))
    if 'lat_planner_solution' in outs:
      self.parse_mdn('lat_planner_solution', outs, in_N=0, out_N=0, out_shape=(ModelConstants.IDX_N,ModelConstants.LAT_PLANNER_SOLUTION_WIDTH))
-    if 'desired_curvature' in outs:
-      self.parse_mdn('desired_curvature', outs, in_N=0, out_N=0, out_shape=(ModelConstants.DESIRED_CURV_WIDTH,))
    for k in ['lead_prob', 'lane_lines_prob', 'meta']:
      self.parse_binary_crossentropy(k, outs)
    self.parse_categorical_crossentropy('desire_state', outs, out_shape=(ModelConstants.DESIRE_PRED_WIDTH,))
@@ -1,27 +0,0 @@
-import os
-from openpilot.system.hardware import TICI
-from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel, Runtime
-assert Runtime
-
-USE_THNEED = int(os.getenv('USE_THNEED', str(int(TICI))))
-USE_SNPE = int(os.getenv('USE_SNPE', str(int(TICI))))
-
-class ModelRunner(RunModel):
-  THNEED = 'THNEED'
-  SNPE = 'SNPE'
-  ONNX = 'ONNX'
-
-  def __new__(cls, paths, *args, **kwargs):
-    if ModelRunner.THNEED in paths and USE_THNEED:
-      from openpilot.selfdrive.modeld.runners.thneedmodel_pyx import ThneedModel as Runner
-      runner_type = ModelRunner.THNEED
-    elif ModelRunner.SNPE in paths and USE_SNPE:
-      from openpilot.selfdrive.modeld.runners.snpemodel_pyx import SNPEModel as Runner
-      runner_type = ModelRunner.SNPE
-    elif ModelRunner.ONNX in paths:
-      from openpilot.selfdrive.modeld.runners.onnxmodel import ONNXModel as Runner
-      runner_type = ModelRunner.ONNX
-    else:
-      raise Exception("Couldn't select a model runner, make sure to pass at least one valid model path")
-
-    return Runner(str(paths[runner_type]), *args, **kwargs)
@@ -1,98 +0,0 @@
-import onnx
-import itertools
-import os
-import sys
-import numpy as np
-from typing import Any
-
-from openpilot.selfdrive.modeld.runners.runmodel_pyx import RunModel
-
-ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
-
-def attributeproto_fp16_to_fp32(attr):
-  float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
-  attr.data_type = 1
-  attr.raw_data = float32_list.astype(np.float32).tobytes()
-
-def convert_fp16_to_fp32(onnx_path_or_bytes):
-  if isinstance(onnx_path_or_bytes, bytes):
-    model = onnx.load_from_string(onnx_path_or_bytes)
-  elif isinstance(onnx_path_or_bytes, str):
-    model = onnx.load(onnx_path_or_bytes)
-
-  for i in model.graph.initializer:
-    if i.data_type == 10:
-      attributeproto_fp16_to_fp32(i)
-  for i in itertools.chain(model.graph.input, model.graph.output):
-    if i.type.tensor_type.elem_type == 10:
-      i.type.tensor_type.elem_type = 1
-  for i in model.graph.node:
-    if i.op_type == 'Cast' and i.attribute[0].i == 10:
-      i.attribute[0].i = 1
-    for a in i.attribute:
-      if hasattr(a, 't'):
-        if a.t.data_type == 10:
-          attributeproto_fp16_to_fp32(a.t)
-  return model.SerializeToString()
-
-def create_ort_session(path, fp16_to_fp32):
-  os.environ["OMP_NUM_THREADS"] = "4"
-  os.environ["OMP_WAIT_POLICY"] = "PASSIVE"
-
-  import onnxruntime as ort
-  print("Onnx available providers: ", ort.get_available_providers(), file=sys.stderr)
-  options = ort.SessionOptions()
-  options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
-
-  provider: str | tuple[str, dict[Any, Any]]
-  if 'OpenVINOExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
-    provider = 'OpenVINOExecutionProvider'
-  elif 'CUDAExecutionProvider' in ort.get_available_providers() and 'ONNXCPU' not in os.environ:
-    options.intra_op_num_threads = 2
-    provider = ('CUDAExecutionProvider', {'cudnn_conv_algo_search': 'DEFAULT'})
-  else:
-    options.intra_op_num_threads = 2
-    options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
-    options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-    provider = 'CPUExecutionProvider'
-
-  model_data = convert_fp16_to_fp32(path) if fp16_to_fp32 else path
-  print("Onnx selected provider: ", [provider], file=sys.stderr)
-  ort_session = ort.InferenceSession(model_data, options, providers=[provider])
-  print("Onnx using ", ort_session.get_providers(), file=sys.stderr)
-  return ort_session
-
-
-class ONNXModel(RunModel):
-  def __init__(self, path, output, runtime, use_tf8, cl_context):
-    self.inputs = {}
-    self.output = output
-
-    self.session = create_ort_session(path, fp16_to_fp32=True)
-    self.input_names = [x.name for x in self.session.get_inputs()]
-    self.input_shapes = {x.name: [1, *x.shape[1:]] for x in self.session.get_inputs()}
-    self.input_dtypes = {x.name: ORT_TYPES_TO_NP_TYPES[x.type] for x in self.session.get_inputs()}
-
-    # run once to initialize CUDA provider
-    if "CUDAExecutionProvider" in self.session.get_providers():
-      self.session.run(None, {k: np.zeros(self.input_shapes[k], dtype=self.input_dtypes[k]) for k in self.input_names})
-    print("ready to run onnx model", self.input_shapes, file=sys.stderr)
-
-  def addInput(self, name, buffer):
-    assert name in self.input_names
-    self.inputs[name] = buffer
-
-  def setInputBuffer(self, name, buffer):
-    assert name in self.inputs
-    self.inputs[name] = buffer
-
-  def getCLBuffer(self, name):
-    return None
-
-  def execute(self):
-    inputs = {k: v.view(self.input_dtypes[k]) for k,v in self.inputs.items()}
-    inputs = {k: v.reshape(self.input_shapes[k]).astype(self.input_dtypes[k]) for k,v in inputs.items()}
-    outputs = self.session.run(None, inputs)
-    assert len(outputs) == 1, "Only single model outputs are supported"
-    self.output[:] = outputs[0]
-    return self.output
@@ -0,0 +1,36 @@
+import onnx
+import onnxruntime as ort
+import numpy as np
+import itertools
+
+ORT_TYPES_TO_NP_TYPES = {'tensor(float16)': np.float16, 'tensor(float)': np.float32, 'tensor(uint8)': np.uint8}
+
+def attributeproto_fp16_to_fp32(attr):
+  float32_list = np.frombuffer(attr.raw_data, dtype=np.float16)
+  attr.data_type = 1
+  attr.raw_data = float32_list.astype(np.float32).tobytes()
+
+def convert_fp16_to_fp32(model):
+  for i in model.graph.initializer:
+    if i.data_type == 10:
+      attributeproto_fp16_to_fp32(i)
+  for i in itertools.chain(model.graph.input, model.graph.output):
+    if i.type.tensor_type.elem_type == 10:
+      i.type.tensor_type.elem_type = 1
+  for i in model.graph.node:
+    if i.op_type == 'Cast' and i.attribute[0].i == 10:
+      i.attribute[0].i = 1
+    for a in i.attribute:
+      if hasattr(a, 't'):
+        if a.t.data_type == 10:
+          attributeproto_fp16_to_fp32(a.t)
+  return model.SerializeToString()
+
+
+def make_onnx_cpu_runner(model_path):
+  options = ort.SessionOptions()
+  options.intra_op_num_threads = 4
+  options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+  options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+  model_data = convert_fp16_to_fp32(onnx.load(model_path))
+  return ort.InferenceSession(model_data,  options, providers=['CPUExecutionProvider'])
@@ -1,4 +0,0 @@
-#pragma once
-
-#include "selfdrive/modeld/runners/runmodel.h"
-#include "selfdrive/modeld/runners/snpemodel.h"
@@ -1,49 +0,0 @@
-#pragma once
-
-#include <string>
-#include <vector>
-#include <memory>
-#include <cassert>
-
-#include "common/clutil.h"
-#include "common/swaglog.h"
-
-#define USE_CPU_RUNTIME 0
-#define USE_GPU_RUNTIME 1
-#define USE_DSP_RUNTIME 2
-
-struct ModelInput {
-  const std::string name;
-  float *buffer;
-  int size;
-
-  ModelInput(const std::string _name, float *_buffer, int _size) : name(_name), buffer(_buffer), size(_size) {}
-  virtual void setBuffer(float *_buffer, int _size) {
-    assert(size == _size || size == 0);
-    buffer = _buffer;
-    size = _size;
-  }
-};
-
-class RunModel {
-public:
-  std::vector<std::unique_ptr<ModelInput>> inputs;
-
-  virtual ~RunModel() {}
-  virtual void execute() {}
-  virtual void* getCLBuffer(const std::string name) { return nullptr; }
-
-  virtual void addInput(const std::string name, float *buffer, int size) {
-    inputs.push_back(std::unique_ptr<ModelInput>(new ModelInput(name, buffer, size)));
-  }
-  virtual void setInputBuffer(const std::string name, float *buffer, int size) {
-    for (auto &input : inputs) {
-      if (name == input->name) {
-        input->setBuffer(buffer, size);
-        return;
-      }
-    }
-    LOGE("Tried to update input `%s` but no input with this name exists", name.c_str());
-    assert(false);
-  }
-};
@@ -1,14 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-cdef extern from "selfdrive/modeld/runners/runmodel.h":
-  cdef int USE_CPU_RUNTIME
-  cdef int USE_GPU_RUNTIME
-  cdef int USE_DSP_RUNTIME
-
-  cdef cppclass RunModel:
-    void addInput(string, float*, int)
-    void setInputBuffer(string, float*, int)
-    void * getCLBuffer(string)
-    void execute()
@@ -1,6 +0,0 @@
-# distutils: language = c++
-
-from .runmodel cimport RunModel as cppRunModel
-
-cdef class RunModel:
-  cdef cppRunModel * model
@@ -1,37 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-from libcpp.string cimport string
-
-from .runmodel cimport USE_CPU_RUNTIME, USE_GPU_RUNTIME, USE_DSP_RUNTIME
-from selfdrive.modeld.models.commonmodel_pyx cimport CLMem
-
-class Runtime:
-  CPU = USE_CPU_RUNTIME
-  GPU = USE_GPU_RUNTIME
-  DSP = USE_DSP_RUNTIME
-
-cdef class RunModel:
-  def __dealloc__(self):
-    del self.model
-
-  def addInput(self, string name, float[:] buffer):
-    if buffer is not None:
-      self.model.addInput(name, &buffer[0], len(buffer))
-    else:
-      self.model.addInput(name, NULL, 0)
-
-  def setInputBuffer(self, string name, float[:] buffer):
-    if buffer is not None:
-      self.model.setInputBuffer(name, &buffer[0], len(buffer))
-    else:
-      self.model.setInputBuffer(name, NULL, 0)
-
-  def getCLBuffer(self, string name):
-    cdef void * cl_buf = self.model.getCLBuffer(name)
-    if not cl_buf:
-      return None
-    return CLMem.create(cl_buf)
-
-  def execute(self):
-    self.model.execute()
@@ -1,116 +0,0 @@
-#pragma clang diagnostic ignored "-Wexceptions"
-
-#include "selfdrive/modeld/runners/snpemodel.h"
-
-#include <cstring>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "common/util.h"
-#include "common/timing.h"
-
-void PrintErrorStringAndExit() {
-  std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
-  std::exit(EXIT_FAILURE);
-}
-
-SNPEModel::SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool _use_tf8, cl_context context) {
-  output = _output;
-  output_size = _output_size;
-  use_tf8 = _use_tf8;
-
-#ifdef QCOM2
-  if (runtime == USE_GPU_RUNTIME) {
-    snpe_runtime = zdl::DlSystem::Runtime_t::GPU;
-  } else if (runtime == USE_DSP_RUNTIME) {
-    snpe_runtime = zdl::DlSystem::Runtime_t::DSP;
-  } else {
-    snpe_runtime = zdl::DlSystem::Runtime_t::CPU;
-  }
-  assert(zdl::SNPE::SNPEFactory::isRuntimeAvailable(snpe_runtime));
-#endif
-  model_data = util::read_file(path);
-  assert(model_data.size() > 0);
-
-  // load model
-  std::unique_ptr<zdl::DlContainer::IDlContainer> container = zdl::DlContainer::IDlContainer::open((uint8_t*)model_data.data(), model_data.size());
-  if (!container) { PrintErrorStringAndExit(); }
-  LOGW("loaded model with size: %lu", model_data.size());
-
-  // create model runner
-  zdl::SNPE::SNPEBuilder snpe_builder(container.get());
-  while (!snpe) {
-#ifdef QCOM2
-    snpe = snpe_builder.setOutputLayers({})
-                       .setRuntimeProcessor(snpe_runtime)
-                       .setUseUserSuppliedBuffers(true)
-                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                       .build();
-#else
-    snpe = snpe_builder.setOutputLayers({})
-                       .setUseUserSuppliedBuffers(true)
-                       .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
-                       .build();
-#endif
-    if (!snpe) std::cerr << zdl::DlSystem::getLastErrorString() << std::endl;
-  }
-
-  // create output buffer
-  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-
-  const auto &output_tensor_names_opt = snpe->getOutputTensorNames();
-  if (!output_tensor_names_opt) throw std::runtime_error("Error obtaining output tensor names");
-  const auto &output_tensor_names = *output_tensor_names_opt;
-  assert(output_tensor_names.size() == 1);
-  const char *output_tensor_name = output_tensor_names.at(0);
-  const zdl::DlSystem::TensorShape &buffer_shape = snpe->getInputOutputBufferAttributes(output_tensor_name)->getDims();
-  if (output_size != 0) {
-    assert(output_size == buffer_shape[1]);
-  } else {
-    output_size = buffer_shape[1];
-  }
-  std::vector<size_t> output_strides = {output_size * sizeof(float), sizeof(float)};
-  output_buffer = ub_factory.createUserBuffer(output, output_size * sizeof(float), output_strides, &ub_encoding_float);
-  output_map.add(output_tensor_name, output_buffer.get());
-}
-
-void SNPEModel::addInput(const std::string name, float *buffer, int size) {
-  const int idx = inputs.size();
-  const auto &input_tensor_names_opt = snpe->getInputTensorNames();
-  if (!input_tensor_names_opt) throw std::runtime_error("Error obtaining input tensor names");
-  const auto &input_tensor_names = *input_tensor_names_opt;
-  const char *input_tensor_name = input_tensor_names.at(idx);
-  const bool input_tf8 = use_tf8 && strcmp(input_tensor_name, "input_img") == 0;  // TODO: This is a terrible hack, get rid of this name check both here and in onnx_runner.py
-  LOGW("adding index %d: %s", idx, input_tensor_name);
-
-  zdl::DlSystem::UserBufferEncodingFloat ub_encoding_float;
-  zdl::DlSystem::UserBufferEncodingTf8 ub_encoding_tf8(0, 1./255); // network takes 0-1
-  zdl::DlSystem::IUserBufferFactory &ub_factory = zdl::SNPE::SNPEFactory::getUserBufferFactory();
-  zdl::DlSystem::UserBufferEncoding *input_encoding = input_tf8 ? (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_tf8 : (zdl::DlSystem::UserBufferEncoding*)&ub_encoding_float;
-
-  const auto &buffer_shape_opt = snpe->getInputDimensions(input_tensor_name);
-  const zdl::DlSystem::TensorShape &buffer_shape = *buffer_shape_opt;
-  size_t size_of_input = input_tf8 ? sizeof(uint8_t) : sizeof(float);
-  std::vector<size_t> strides(buffer_shape.rank());
-  strides[strides.size() - 1] = size_of_input;
-  size_t product = 1;
-  for (size_t i = 0; i < buffer_shape.rank(); i++) product *= buffer_shape[i];
-  size_t stride = strides[strides.size() - 1];
-  for (size_t i = buffer_shape.rank() - 1; i > 0; i--) {
-    stride *= buffer_shape[i];
-    strides[i-1] = stride;
-  }
-
-  auto input_buffer = ub_factory.createUserBuffer(buffer, product*size_of_input, strides, input_encoding);
-  input_map.add(input_tensor_name, input_buffer.get());
-  inputs.push_back(std::unique_ptr<SNPEModelInput>(new SNPEModelInput(name, buffer, size, std::move(input_buffer))));
-}
-
-void SNPEModel::execute() {
-  if (!snpe->execute(input_map, output_map)) {
-    PrintErrorStringAndExit();
-  }
-}
@@ -1,52 +0,0 @@
-#pragma once
-#pragma clang diagnostic ignored "-Wdeprecated-declarations"
-
-#include <memory>
-#include <string>
-#include <utility>
-
-#include <DlContainer/IDlContainer.hpp>
-#include <DlSystem/DlError.hpp>
-#include <DlSystem/ITensor.hpp>
-#include <DlSystem/ITensorFactory.hpp>
-#include <DlSystem/IUserBuffer.hpp>
-#include <DlSystem/IUserBufferFactory.hpp>
-#include <SNPE/SNPE.hpp>
-#include <SNPE/SNPEBuilder.hpp>
-#include <SNPE/SNPEFactory.hpp>
-
-#include "selfdrive/modeld/runners/runmodel.h"
-
-struct SNPEModelInput : public ModelInput {
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> snpe_buffer;
-
-  SNPEModelInput(const std::string _name, float *_buffer, int _size, std::unique_ptr<zdl::DlSystem::IUserBuffer> _snpe_buffer) : ModelInput(_name, _buffer, _size), snpe_buffer(std::move(_snpe_buffer)) {}
-  void setBuffer(float *_buffer, int _size) {
-    ModelInput::setBuffer(_buffer, _size);
-    assert(snpe_buffer->setBufferAddress(_buffer) == true);
-  }
-};
-
-class SNPEModel : public RunModel {
-public:
-  SNPEModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void addInput(const std::string name, float *buffer, int size);
-  void execute();
-
-private:
-  std::string model_data;
-
-#ifdef QCOM2
-  zdl::DlSystem::Runtime_t snpe_runtime;
-#endif
-
-  // snpe model stuff
-  std::unique_ptr<zdl::SNPE::SNPE> snpe;
-  zdl::DlSystem::UserBufferMap input_map;
-  zdl::DlSystem::UserBufferMap output_map;
-  std::unique_ptr<zdl::DlSystem::IUserBuffer> output_buffer;
-
-  bool use_tf8;
-  float *output;
-  size_t output_size;
-};
@@ -1,9 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-from msgq.visionipc.visionipc cimport cl_context
-
-cdef extern from "selfdrive/modeld/runners/snpemodel.h":
-  cdef cppclass SNPEModel:
-    SNPEModel(string, float*, size_t, int, bool, cl_context)
@@ -1,17 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-import os
-from libcpp cimport bool
-from libcpp.string cimport string
-
-from .snpemodel cimport SNPEModel as cppSNPEModel
-from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
-from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
-from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
-
-os.environ['ADSP_LIBRARY_PATH'] = "/data/pythonpath/third_party/snpe/dsp/"
-
-cdef class SNPEModel(RunModel):
-  def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
-    self.model = <cppRunModel *> new cppSNPEModel(path, &output[0], len(output), runtime, use_tf8, context.context)
@@ -1,58 +0,0 @@
-#include "selfdrive/modeld/runners/thneedmodel.h"
-
-#include <string>
-
-#include "common/swaglog.h"
-
-ThneedModel::ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool luse_tf8, cl_context context) {
-  thneed = new Thneed(true, context);
-  thneed->load(path.c_str());
-  thneed->clexec();
-
-  recorded = false;
-  output = _output;
-}
-
-void* ThneedModel::getCLBuffer(const std::string name) {
-  int index = -1;
-  for (int i = 0; i < inputs.size(); i++) {
-    if (name == inputs[i]->name) {
-      index = i;
-      break;
-    }
-  }
-
-  if (index == -1) {
-    LOGE("Tried to get CL buffer for input `%s` but no input with this name exists", name.c_str());
-    assert(false);
-  }
-
-  if (thneed->input_clmem.size() >= inputs.size()) {
-    return &thneed->input_clmem[inputs.size() - index - 1];
-  } else {
-    return nullptr;
-  }
-}
-
-void ThneedModel::execute() {
-  if (!recorded) {
-    thneed->record = true;
-    float *input_buffers[inputs.size()];
-    for (int i = 0; i < inputs.size(); i++) {
-      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
-    }
-
-    thneed->copy_inputs(input_buffers);
-    thneed->clexec();
-    thneed->copy_output(output);
-    thneed->stop();
-
-    recorded = true;
-  } else {
-    float *input_buffers[inputs.size()];
-    for (int i = 0; i < inputs.size(); i++) {
-      input_buffers[inputs.size() - i - 1] = inputs[i]->buffer;
-    }
-    thneed->execute(input_buffers, output);
-  }
-}
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <string>
-
-#include "selfdrive/modeld/runners/runmodel.h"
-#include "selfdrive/modeld/thneed/thneed.h"
-
-class ThneedModel : public RunModel {
-public:
-  ThneedModel(const std::string path, float *_output, size_t _output_size, int runtime, bool use_tf8 = false, cl_context context = NULL);
-  void *getCLBuffer(const std::string name);
-  void execute();
-private:
-  Thneed *thneed = NULL;
-  bool recorded;
-  float *output;
-};
@@ -1,9 +0,0 @@
-# distutils: language = c++
-
-from libcpp.string cimport string
-
-from msgq.visionipc.visionipc cimport cl_context
-
-cdef extern from "selfdrive/modeld/runners/thneedmodel.h":
-  cdef cppclass ThneedModel:
-    ThneedModel(string, float*, size_t, int, bool, cl_context)
@@ -1,14 +0,0 @@
-# distutils: language = c++
-# cython: c_string_encoding=ascii, language_level=3
-
-from libcpp cimport bool
-from libcpp.string cimport string
-
-from .thneedmodel cimport ThneedModel as cppThneedModel
-from selfdrive.modeld.models.commonmodel_pyx cimport CLContext
-from selfdrive.modeld.runners.runmodel_pyx cimport RunModel
-from selfdrive.modeld.runners.runmodel cimport RunModel as cppRunModel
-
-cdef class ThneedModel(RunModel):
-  def __cinit__(self, string path, float[:] output, int runtime, bool use_tf8, CLContext context):
-    self.model = <cppRunModel *> new cppThneedModel(path, &output[0], len(output), runtime, use_tf8, context.context)
@@ -0,0 +1,8 @@
+
+from tinygrad.tensor import Tensor
+from tinygrad.helpers import to_mv
+
+def qcom_tensor_from_opencl_address(opencl_address, shape, dtype):
+  cl_buf_desc_ptr = to_mv(opencl_address, 8).cast('Q')[0]
+  rawbuf_ptr = to_mv(cl_buf_desc_ptr, 0x100).cast('Q')[20] # offset 0xA0 is a raw gpu pointer.
+  return Tensor.from_blob(rawbuf_ptr, shape, dtype=dtype, device='QCOM')
@@ -1,8 +0,0 @@
-thneed is an SNPE accelerator. I know SNPE is already an accelerator, but sometimes things need to go even faster..
-
-It runs on the local device, and caches a single model run. Then it replays it, but fast.
-
-thneed slices through abstraction layers like a fish.
-
-You need a thneed.
-
@@ -1,154 +0,0 @@
-#include <cassert>
-#include <set>
-
-#include "third_party/json11/json11.hpp"
-#include "common/util.h"
-#include "common/clutil.h"
-#include "common/swaglog.h"
-#include "selfdrive/modeld/thneed/thneed.h"
-using namespace json11;
-
-extern map<cl_program, string> g_program_source;
-
-void Thneed::load(const char *filename) {
-  LOGD("Thneed::load: loading from %s\n", filename);
-
-  string buf = util::read_file(filename);
-  int jsz = *(int *)buf.data();
-  string jsonerr;
-  string jj(buf.data() + sizeof(int), jsz);
-  Json jdat = Json::parse(jj, jsonerr);
-
-  map<cl_mem, cl_mem> real_mem;
-  real_mem[NULL] = NULL;
-
-  int ptr = sizeof(int)+jsz;
-  for (auto &obj : jdat["objects"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    cl_mem clbuf = NULL;
-
-    if (mobj["buffer_id"].string_value().size() > 0) {
-      // image buffer must already be allocated
-      clbuf = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-      assert(mobj["needs_load"].bool_value() == false);
-    } else {
-      if (mobj["needs_load"].bool_value()) {
-        clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, &buf[ptr], NULL);
-        if (debug >= 1) printf("loading %p %d @ 0x%X\n", clbuf, sz, ptr);
-        ptr += sz;
-      } else {
-        // TODO: is there a faster way to init zeroed out buffers?
-        void *host_zeros = calloc(sz, 1);
-        clbuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sz, host_zeros, NULL);
-        free(host_zeros);
-      }
-    }
-    assert(clbuf != NULL);
-
-    if (mobj["arg_type"] == "image2d_t" || mobj["arg_type"] == "image1d_t") {
-      cl_image_desc desc = {0};
-      desc.image_type = (mobj["arg_type"] == "image2d_t") ? CL_MEM_OBJECT_IMAGE2D : CL_MEM_OBJECT_IMAGE1D_BUFFER;
-      desc.image_width = mobj["width"].int_value();
-      desc.image_height = mobj["height"].int_value();
-      desc.image_row_pitch = mobj["row_pitch"].int_value();
-      assert(sz == desc.image_height*desc.image_row_pitch);
-#ifdef QCOM2
-      desc.buffer = clbuf;
-#else
-      // TODO: we are creating unused buffers on PC
-      clReleaseMemObject(clbuf);
-#endif
-      cl_image_format format = {0};
-      format.image_channel_order = CL_RGBA;
-      format.image_channel_data_type = mobj["float32"].bool_value() ? CL_FLOAT : CL_HALF_FLOAT;
-
-      cl_int errcode;
-
-#ifndef QCOM2
-      if (mobj["needs_load"].bool_value()) {
-        clbuf = clCreateImage(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, &format, &desc, &buf[ptr-sz], &errcode);
-      } else {
-        clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
-      }
-#else
-      clbuf = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &errcode);
-#endif
-      if (clbuf == NULL) {
-        LOGE("clError: %s create image %zux%zu rp %zu with buffer %p\n", cl_get_error_string(errcode),
-             desc.image_width, desc.image_height, desc.image_row_pitch, desc.buffer);
-      }
-      assert(clbuf != NULL);
-    }
-
-    real_mem[*(cl_mem*)(mobj["id"].string_value().data())] = clbuf;
-  }
-
-  map<string, cl_program> g_programs;
-  for (const auto &[name, source] : jdat["programs"].object_items()) {
-    if (debug >= 1) printf("building %s with size %zu\n", name.c_str(), source.string_value().size());
-    g_programs[name] = cl_program_from_source(context, device_id, source.string_value());
-  }
-
-  for (auto &obj : jdat["inputs"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    cl_mem aa = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-    input_clmem.push_back(aa);
-    input_sizes.push_back(sz);
-    LOGD("Thneed::load: adding input %s with size %d\n", mobj["name"].string_value().data(), sz);
-
-    cl_int cl_err;
-    void *ret = clEnqueueMapBuffer(command_queue, aa, CL_TRUE, CL_MAP_WRITE, 0, sz, 0, NULL, NULL, &cl_err);
-    if (cl_err != CL_SUCCESS) LOGE("clError: %s map %p %d\n", cl_get_error_string(cl_err), aa, sz);
-    assert(cl_err == CL_SUCCESS);
-    inputs.push_back(ret);
-  }
-
-  for (auto &obj : jdat["outputs"].array_items()) {
-    auto mobj = obj.object_items();
-    int sz = mobj["size"].int_value();
-    LOGD("Thneed::save: adding output with size %d\n", sz);
-    // TODO: support multiple outputs
-    output = real_mem[*(cl_mem*)(mobj["buffer_id"].string_value().data())];
-    assert(output != NULL);
-  }
-
-  for (auto &obj : jdat["binaries"].array_items()) {
-    string name = obj["name"].string_value();
-    size_t length = obj["length"].int_value();
-    if (debug >= 1) printf("binary %s with size %zu\n", name.c_str(), length);
-    g_programs[name] = cl_program_from_binary(context, device_id, (const uint8_t*)&buf[ptr], length);
-    ptr += length;
-  }
-
-  for (auto &obj : jdat["kernels"].array_items()) {
-    auto gws = obj["global_work_size"];
-    auto lws = obj["local_work_size"];
-    auto kk = shared_ptr<CLQueuedKernel>(new CLQueuedKernel(this));
-
-    kk->name = obj["name"].string_value();
-    kk->program = g_programs[kk->name];
-    kk->work_dim = obj["work_dim"].int_value();
-    for (int i = 0; i < kk->work_dim; i++) {
-      kk->global_work_size[i] = gws[i].int_value();
-      kk->local_work_size[i] = lws[i].int_value();
-    }
-    kk->num_args = obj["num_args"].int_value();
-    for (int i = 0; i < kk->num_args; i++) {
-      string arg = obj["args"].array_items()[i].string_value();
-      int arg_size = obj["args_size"].array_items()[i].int_value();
-      kk->args_size.push_back(arg_size);
-      if (arg_size == 8) {
-        cl_mem val = *(cl_mem*)(arg.data());
-        val = real_mem[val];
-        kk->args.push_back(string((char*)&val, sizeof(val)));
-      } else {
-        kk->args.push_back(arg);
-      }
-    }
-    kq.push_back(kk);
-  }
-
-  clFinish(command_queue);
-}
@@ -1,133 +0,0 @@
-#pragma once
-
-#ifndef __user
-#define __user __attribute__(())
-#endif
-
-#include <cstdint>
-#include <cstdlib>
-#include <memory>
-#include <string>
-#include <vector>
-
-#include <CL/cl.h>
-
-#include "third_party/linux/include/msm_kgsl.h"
-
-using namespace std;
-
-cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value);
-
-namespace json11 {
-  class Json;
-}
-class Thneed;
-
-class GPUMalloc {
-  public:
-    GPUMalloc(int size, int fd);
-    ~GPUMalloc();
-    void *alloc(int size);
-  private:
-    uint64_t base;
-    int remaining;
-};
-
-class CLQueuedKernel {
-  public:
-    CLQueuedKernel(Thneed *lthneed) { thneed = lthneed; }
-    CLQueuedKernel(Thneed *lthneed,
-                   cl_kernel _kernel,
-                   cl_uint _work_dim,
-                   const size_t *_global_work_size,
-                   const size_t *_local_work_size);
-    cl_int exec();
-    void debug_print(bool verbose);
-    int get_arg_num(const char *search_arg_name);
-    cl_program program;
-    string name;
-    cl_uint num_args;
-    vector<string> arg_names;
-    vector<string> arg_types;
-    vector<string> args;
-    vector<int> args_size;
-    cl_kernel kernel = NULL;
-    json11::Json to_json() const;
-
-    cl_uint work_dim;
-    size_t global_work_size[3] = {0};
-    size_t local_work_size[3] = {0};
-  private:
-    Thneed *thneed;
-};
-
-class CachedIoctl {
-  public:
-    virtual void exec() {}
-};
-
-class CachedSync: public CachedIoctl {
-  public:
-    CachedSync(Thneed *lthneed, string ldata) { thneed = lthneed; data = ldata; }
-    void exec();
-  private:
-    Thneed *thneed;
-    string data;
-};
-
-class CachedCommand: public CachedIoctl {
-  public:
-    CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd);
-    void exec();
-  private:
-    void disassemble(int cmd_index);
-    struct kgsl_gpu_command cache;
-    unique_ptr<kgsl_command_object[]> cmds;
-    unique_ptr<kgsl_command_object[]> objs;
-    Thneed *thneed;
-    vector<shared_ptr<CLQueuedKernel> > kq;
-};
-
-class Thneed {
-  public:
-    Thneed(bool do_clinit=false, cl_context _context = NULL);
-    void stop();
-    void execute(float **finputs, float *foutput, bool slow=false);
-    void wait();
-
-    vector<cl_mem> input_clmem;
-    vector<void *> inputs;
-    vector<size_t> input_sizes;
-    cl_mem output = NULL;
-
-    cl_context context = NULL;
-    cl_command_queue command_queue;
-    cl_device_id device_id;
-    int context_id;
-
-    // protected?
-    bool record = false;
-    int debug;
-    int timestamp;
-
-#ifdef QCOM2
-    unique_ptr<GPUMalloc> ram;
-    vector<unique_ptr<CachedIoctl> > cmds;
-    int fd;
-#endif
-
-    // all CL kernels
-    void copy_inputs(float **finputs, bool internal=false);
-    void copy_output(float *foutput);
-    cl_int clexec();
-    vector<shared_ptr<CLQueuedKernel> > kq;
-
-    // pending CL kernels
-    vector<shared_ptr<CLQueuedKernel> > ckq;
-
-    // loading
-    void load(const char *filename);
-  private:
-    void clinit();
-};
-
@@ -1,216 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <cassert>
-#include <cstring>
-#include <map>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-map<pair<cl_kernel, int>, string> g_args;
-map<pair<cl_kernel, int>, int> g_args_size;
-map<cl_program, string> g_program_source;
-
-void Thneed::stop() {
-  //printf("Thneed::stop: recorded %lu commands\n", cmds.size());
-  record = false;
-}
-
-void Thneed::clinit() {
-  device_id = cl_get_device_id(CL_DEVICE_TYPE_DEFAULT);
-  if (context == NULL) context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, NULL, NULL, &err));
-  //cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
-  cl_command_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
-  command_queue = CL_CHECK_ERR(clCreateCommandQueueWithProperties(context, device_id, props, &err));
-  printf("Thneed::clinit done\n");
-}
-
-cl_int Thneed::clexec() {
-  if (debug >= 1) printf("Thneed::clexec: running %lu queued kernels\n", kq.size());
-  for (auto &k : kq) {
-    if (record) ckq.push_back(k);
-    cl_int ret = k->exec();
-    assert(ret == CL_SUCCESS);
-  }
-  return clFinish(command_queue);
-}
-
-void Thneed::copy_inputs(float **finputs, bool internal) {
-  for (int idx = 0; idx < inputs.size(); ++idx) {
-    if (debug >= 1) printf("copying %lu -- %p -> %p (cl %p)\n", input_sizes[idx], finputs[idx], inputs[idx], input_clmem[idx]);
-
-    if (internal) {
-      // if it's internal, using memcpy is fine since the buffer sync is cached in the ioctl layer
-      if (finputs[idx] != NULL) memcpy(inputs[idx], finputs[idx], input_sizes[idx]);
-    } else {
-      if (finputs[idx] != NULL) CL_CHECK(clEnqueueWriteBuffer(command_queue, input_clmem[idx], CL_TRUE, 0, input_sizes[idx], finputs[idx], 0, NULL, NULL));
-    }
-  }
-}
-
-void Thneed::copy_output(float *foutput) {
-  if (output != NULL) {
-    size_t sz;
-    clGetMemObjectInfo(output, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-    if (debug >= 1) printf("copying %lu for output %p -> %p\n", sz, output, foutput);
-    CL_CHECK(clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sz, foutput, 0, NULL, NULL));
-  } else {
-    printf("CAUTION: model output is NULL, does it have no outputs?\n");
-  }
-}
-
-// *********** CLQueuedKernel ***********
-
-CLQueuedKernel::CLQueuedKernel(Thneed *lthneed,
-                               cl_kernel _kernel,
-                               cl_uint _work_dim,
-                               const size_t *_global_work_size,
-                               const size_t *_local_work_size) {
-  thneed = lthneed;
-  kernel = _kernel;
-  work_dim = _work_dim;
-  assert(work_dim <= 3);
-  for (int i = 0; i < work_dim; i++) {
-    global_work_size[i] = _global_work_size[i];
-    local_work_size[i] = _local_work_size[i];
-  }
-
-  char _name[0x100];
-  clGetKernelInfo(kernel, CL_KERNEL_FUNCTION_NAME, sizeof(_name), _name, NULL);
-  name = string(_name);
-  clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(num_args), &num_args, NULL);
-
-  // get args
-  for (int i = 0; i < num_args; i++) {
-    char arg_name[0x100] = {0};
-    clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-    arg_names.push_back(string(arg_name));
-    clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_name), arg_name, NULL);
-    arg_types.push_back(string(arg_name));
-
-    args.push_back(g_args[make_pair(kernel, i)]);
-    args_size.push_back(g_args_size[make_pair(kernel, i)]);
-  }
-
-  // get program
-  clGetKernelInfo(kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, NULL);
-}
-
-int CLQueuedKernel::get_arg_num(const char *search_arg_name) {
-  for (int i = 0; i < num_args; i++) {
-    if (arg_names[i] == search_arg_name) return i;
-  }
-  printf("failed to find %s in %s\n", search_arg_name, name.c_str());
-  assert(false);
-}
-
-cl_int CLQueuedKernel::exec() {
-  if (kernel == NULL) {
-    kernel = clCreateKernel(program, name.c_str(), NULL);
-    arg_names.clear();
-    arg_types.clear();
-
-    for (int j = 0; j < num_args; j++) {
-      char arg_name[0x100] = {0};
-      clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_NAME, sizeof(arg_name), arg_name, NULL);
-      arg_names.push_back(string(arg_name));
-      clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_TYPE_NAME, sizeof(arg_name), arg_name, NULL);
-      arg_types.push_back(string(arg_name));
-
-      cl_int ret;
-      if (args[j].size() != 0) {
-        assert(args[j].size() == args_size[j]);
-        ret = thneed_clSetKernelArg(kernel, j, args[j].size(), args[j].data());
-      } else {
-        ret = thneed_clSetKernelArg(kernel, j, args_size[j], NULL);
-      }
-      assert(ret == CL_SUCCESS);
-    }
-  }
-
-  if (thneed->debug >= 1) {
-    debug_print(thneed->debug >= 2);
-  }
-
-  return clEnqueueNDRangeKernel(thneed->command_queue,
-    kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL);
-}
-
-void CLQueuedKernel::debug_print(bool verbose) {
-  printf("%p %56s -- ", kernel, name.c_str());
-  for (int i = 0; i < work_dim; i++) {
-    printf("%4zu ", global_work_size[i]);
-  }
-  printf(" -- ");
-  for (int i = 0; i < work_dim; i++) {
-    printf("%4zu ", local_work_size[i]);
-  }
-  printf("\n");
-
-  if (verbose) {
-    for (int i = 0; i < num_args; i++) {
-      string arg = args[i];
-      printf("  %s %s", arg_types[i].c_str(), arg_names[i].c_str());
-      void *arg_value = (void*)arg.data();
-      int arg_size = arg.size();
-      if (arg_size == 0) {
-        printf(" (size) %d", args_size[i]);
-      } else if (arg_size == 1) {
-        printf(" = %d", *((char*)arg_value));
-      } else if (arg_size == 2) {
-        printf(" = %d", *((short*)arg_value));
-      } else if (arg_size == 4) {
-        if (arg_types[i] == "float") {
-          printf(" = %f", *((float*)arg_value));
-        } else {
-          printf(" = %d", *((int*)arg_value));
-        }
-      } else if (arg_size == 8) {
-        cl_mem val = (cl_mem)(*((uintptr_t*)arg_value));
-        printf(" = %p", val);
-        if (val != NULL) {
-          cl_mem_object_type obj_type;
-          clGetMemObjectInfo(val, CL_MEM_TYPE, sizeof(obj_type), &obj_type, NULL);
-          if (arg_types[i] == "image2d_t" || arg_types[i] == "image1d_t" || obj_type == CL_MEM_OBJECT_IMAGE2D) {
-            cl_image_format format;
-            size_t width, height, depth, array_size, row_pitch, slice_pitch;
-            cl_mem buf;
-            clGetImageInfo(val, CL_IMAGE_FORMAT, sizeof(format), &format, NULL);
-            assert(format.image_channel_order == CL_RGBA);
-            assert(format.image_channel_data_type == CL_HALF_FLOAT || format.image_channel_data_type == CL_FLOAT);
-            clGetImageInfo(val, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
-            clGetImageInfo(val, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
-            clGetImageInfo(val, CL_IMAGE_ROW_PITCH, sizeof(row_pitch), &row_pitch, NULL);
-            clGetImageInfo(val, CL_IMAGE_DEPTH, sizeof(depth), &depth, NULL);
-            clGetImageInfo(val, CL_IMAGE_ARRAY_SIZE, sizeof(array_size), &array_size, NULL);
-            clGetImageInfo(val, CL_IMAGE_SLICE_PITCH, sizeof(slice_pitch), &slice_pitch, NULL);
-            assert(depth == 0);
-            assert(array_size == 0);
-            assert(slice_pitch == 0);
-
-            clGetImageInfo(val, CL_IMAGE_BUFFER, sizeof(buf), &buf, NULL);
-            size_t sz = 0;
-            if (buf != NULL) clGetMemObjectInfo(buf, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-            printf(" image %zu x %zu rp %zu @ %p buffer %zu", width, height, row_pitch, buf, sz);
-          } else {
-            size_t sz;
-            clGetMemObjectInfo(val, CL_MEM_SIZE, sizeof(sz), &sz, NULL);
-            printf(" buffer %zu", sz);
-          }
-        }
-      }
-      printf("\n");
-    }
-  }
-}
-
-cl_int thneed_clSetKernelArg(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void *arg_value) {
-  g_args_size[make_pair(kernel, arg_index)] = arg_size;
-  if (arg_value != NULL) {
-    g_args[make_pair(kernel, arg_index)] = string((char*)arg_value, arg_size);
-  } else {
-    g_args[make_pair(kernel, arg_index)] = string("");
-  }
-  cl_int ret = clSetKernelArg(kernel, arg_index, arg_size, arg_value);
-  return ret;
-}
@@ -1,32 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <cassert>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-Thneed::Thneed(bool do_clinit, cl_context _context) {
-  context = _context;
-  if (do_clinit) clinit();
-  char *thneed_debug_env = getenv("THNEED_DEBUG");
-  debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
-}
-
-void Thneed::execute(float **finputs, float *foutput, bool slow) {
-  uint64_t tb, te;
-  if (debug >= 1) tb = nanos_since_boot();
-
-  // ****** copy inputs
-  copy_inputs(finputs);
-
-  // ****** run commands
-  clexec();
-
-  // ****** copy outputs
-  copy_output(foutput);
-
-  if (debug >= 1) {
-    te = nanos_since_boot();
-    printf("model exec in %lu us\n", (te-tb)/1000);
-  }
-}
@@ -1,258 +0,0 @@
-#include "selfdrive/modeld/thneed/thneed.h"
-
-#include <dlfcn.h>
-#include <sys/mman.h>
-
-#include <cassert>
-#include <cerrno>
-#include <cstring>
-#include <map>
-#include <string>
-
-#include "common/clutil.h"
-#include "common/timing.h"
-
-Thneed *g_thneed = NULL;
-int g_fd = -1;
-
-void hexdump(uint8_t *d, int len) {
-  assert((len%4) == 0);
-  printf("  dumping %p len 0x%x\n", d, len);
-  for (int i = 0; i < len/4; i++) {
-    if (i != 0 && (i%0x10) == 0) printf("\n");
-    printf("%8x ", d[i]);
-  }
-  printf("\n");
-}
-
-// *********** ioctl interceptor ***********
-
-extern "C" {
-
-int (*my_ioctl)(int filedes, unsigned long request, void *argp) = NULL;
-#undef ioctl
-int ioctl(int filedes, unsigned long request, void *argp) {
-  request &= 0xFFFFFFFF;  // needed on QCOM2
-  if (my_ioctl == NULL) my_ioctl = reinterpret_cast<decltype(my_ioctl)>(dlsym(RTLD_NEXT, "ioctl"));
-  Thneed *thneed = g_thneed;
-
-  // save the fd
-  if (request == IOCTL_KGSL_GPUOBJ_ALLOC) g_fd = filedes;
-
-  // note that this runs always, even without a thneed object
-  if (request == IOCTL_KGSL_DRAWCTXT_CREATE) {
-    struct kgsl_drawctxt_create *create = (struct kgsl_drawctxt_create *)argp;
-    create->flags &= ~KGSL_CONTEXT_PRIORITY_MASK;
-    create->flags |= 6 << KGSL_CONTEXT_PRIORITY_SHIFT;   // priority from 1-15, 1 is max priority
-    printf("IOCTL_KGSL_DRAWCTXT_CREATE: creating context with flags 0x%x\n", create->flags);
-  }
-
-  if (thneed != NULL) {
-    if (request == IOCTL_KGSL_GPU_COMMAND) {
-      struct kgsl_gpu_command *cmd = (struct kgsl_gpu_command *)argp;
-      if (thneed->record) {
-        thneed->timestamp = cmd->timestamp;
-        thneed->context_id = cmd->context_id;
-        thneed->cmds.push_back(unique_ptr<CachedCommand>(new CachedCommand(thneed, cmd)));
-      }
-      if (thneed->debug >= 1) {
-        printf("IOCTL_KGSL_GPU_COMMAND(%2zu): flags: 0x%lx    context_id: %u  timestamp: %u  numcmds: %d  numobjs: %d\n",
-            thneed->cmds.size(),
-            cmd->flags,
-            cmd->context_id, cmd->timestamp, cmd->numcmds, cmd->numobjs);
-      }
-    } else if (request == IOCTL_KGSL_GPUOBJ_SYNC) {
-      struct kgsl_gpuobj_sync *cmd = (struct kgsl_gpuobj_sync *)argp;
-      struct kgsl_gpuobj_sync_obj *objs = (struct kgsl_gpuobj_sync_obj *)(cmd->objs);
-
-      if (thneed->debug >= 2) {
-        printf("IOCTL_KGSL_GPUOBJ_SYNC count:%d ", cmd->count);
-        for (int i = 0; i < cmd->count; i++) {
-          printf(" -- offset:0x%lx len:0x%lx id:%d op:%d  ", objs[i].offset, objs[i].length, objs[i].id, objs[i].op);
-        }
-        printf("\n");
-      }
-
-      if (thneed->record) {
-        thneed->cmds.push_back(unique_ptr<CachedSync>(new
-              CachedSync(thneed, string((char *)objs, sizeof(struct kgsl_gpuobj_sync_obj)*cmd->count))));
-      }
-    } else if (request == IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID) {
-      struct kgsl_device_waittimestamp_ctxtid *cmd = (struct kgsl_device_waittimestamp_ctxtid *)argp;
-      if (thneed->debug >= 1) {
-        printf("IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID: context_id: %d  timestamp: %d  timeout: %d\n",
-            cmd->context_id, cmd->timestamp, cmd->timeout);
-      }
-    } else if (request == IOCTL_KGSL_SETPROPERTY) {
-      if (thneed->debug >= 1) {
-        struct kgsl_device_getproperty *prop = (struct kgsl_device_getproperty *)argp;
-        printf("IOCTL_KGSL_SETPROPERTY: 0x%x sizebytes:%zu\n", prop->type, prop->sizebytes);
-        if (thneed->debug >= 2) {
-          hexdump((uint8_t *)prop->value, prop->sizebytes);
-          if (prop->type == KGSL_PROP_PWR_CONSTRAINT) {
-            struct kgsl_device_constraint *constraint = (struct kgsl_device_constraint *)prop->value;
-            hexdump((uint8_t *)constraint->data, constraint->size);
-          }
-        }
-      }
-    } else if (request == IOCTL_KGSL_DRAWCTXT_CREATE || request == IOCTL_KGSL_DRAWCTXT_DESTROY) {
-      // this happens
-    } else if (request == IOCTL_KGSL_GPUOBJ_ALLOC || request == IOCTL_KGSL_GPUOBJ_FREE) {
-      // this happens
-    } else {
-      if (thneed->debug >= 1) {
-        printf("other ioctl %lx\n", request);
-      }
-    }
-  }
-
-  int ret = my_ioctl(filedes, request, argp);
-  // NOTE: This error message goes into stdout and messes up pyenv
-  // if (ret != 0) printf("ioctl returned %d with errno %d\n", ret, errno);
-  return ret;
-}
-
-}
-
-// *********** GPUMalloc ***********
-
-GPUMalloc::GPUMalloc(int size, int fd) {
-  struct kgsl_gpuobj_alloc alloc;
-  memset(&alloc, 0, sizeof(alloc));
-  alloc.size = size;
-  alloc.flags = 0x10000a00;
-  ioctl(fd, IOCTL_KGSL_GPUOBJ_ALLOC, &alloc);
-  void *addr = mmap64(NULL, alloc.mmapsize, 0x3, 0x1, fd, alloc.id*0x1000);
-  assert(addr != MAP_FAILED);
-
-  base = (uint64_t)addr;
-  remaining = size;
-}
-
-GPUMalloc::~GPUMalloc() {
-  // TODO: free the GPU malloced area
-}
-
-void *GPUMalloc::alloc(int size) {
-  void *ret = (void*)base;
-  size = (size+0xff) & (~0xFF);
-  assert(size <= remaining);
-  remaining -= size;
-  base += size;
-  return ret;
-}
-
-// *********** CachedSync, at the ioctl layer ***********
-
-void CachedSync::exec() {
-  struct kgsl_gpuobj_sync cmd;
-
-  cmd.objs = (uint64_t)data.data();
-  cmd.obj_len = data.length();
-  cmd.count = data.length() / sizeof(struct kgsl_gpuobj_sync_obj);
-
-  int ret = ioctl(thneed->fd, IOCTL_KGSL_GPUOBJ_SYNC, &cmd);
-  assert(ret == 0);
-}
-
-// *********** CachedCommand, at the ioctl layer ***********
-
-CachedCommand::CachedCommand(Thneed *lthneed, struct kgsl_gpu_command *cmd) {
-  thneed = lthneed;
-  assert(cmd->numsyncs == 0);
-
-  memcpy(&cache, cmd, sizeof(cache));
-
-  if (cmd->numcmds > 0) {
-    cmds = make_unique<struct kgsl_command_object[]>(cmd->numcmds);
-    memcpy(cmds.get(), (void *)cmd->cmdlist, sizeof(struct kgsl_command_object)*cmd->numcmds);
-    cache.cmdlist = (uint64_t)cmds.get();
-    for (int i = 0; i < cmd->numcmds; i++) {
-      void *nn = thneed->ram->alloc(cmds[i].size);
-      memcpy(nn, (void*)cmds[i].gpuaddr, cmds[i].size);
-      cmds[i].gpuaddr = (uint64_t)nn;
-    }
-  }
-
-  if (cmd->numobjs > 0) {
-    objs = make_unique<struct kgsl_command_object[]>(cmd->numobjs);
-    memcpy(objs.get(), (void *)cmd->objlist, sizeof(struct kgsl_command_object)*cmd->numobjs);
-    cache.objlist = (uint64_t)objs.get();
-    for (int i = 0; i < cmd->numobjs; i++) {
-      void *nn = thneed->ram->alloc(objs[i].size);
-      memset(nn, 0, objs[i].size);
-      objs[i].gpuaddr = (uint64_t)nn;
-    }
-  }
-
-  kq = thneed->ckq;
-  thneed->ckq.clear();
-}
-
-void CachedCommand::exec() {
-  cache.timestamp = ++thneed->timestamp;
-  int ret = ioctl(thneed->fd, IOCTL_KGSL_GPU_COMMAND, &cache);
-
-  if (thneed->debug >= 1) printf("CachedCommand::exec got %d\n", ret);
-
-  if (thneed->debug >= 2) {
-    for (auto &it : kq) {
-      it->debug_print(false);
-    }
-  }
-
-  assert(ret == 0);
-}
-
-// *********** Thneed ***********
-
-Thneed::Thneed(bool do_clinit, cl_context _context) {
-  // TODO: QCOM2 actually requires a different context
-  //context = _context;
-  if (do_clinit) clinit();
-  assert(g_fd != -1);
-  fd = g_fd;
-  ram = make_unique<GPUMalloc>(0x80000, fd);
-  timestamp = -1;
-  g_thneed = this;
-  char *thneed_debug_env = getenv("THNEED_DEBUG");
-  debug = (thneed_debug_env != NULL) ? atoi(thneed_debug_env) : 0;
-}
-
-void Thneed::wait() {
-  struct kgsl_device_waittimestamp_ctxtid wait;
-  wait.context_id = context_id;
-  wait.timestamp = timestamp;
-  wait.timeout = -1;
-
-  uint64_t tb = nanos_since_boot();
-  int wret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
-  uint64_t te = nanos_since_boot();
-
-  if (debug >= 1) printf("wait %d after %lu us\n", wret, (te-tb)/1000);
-}
-
-void Thneed::execute(float **finputs, float *foutput, bool slow) {
-  uint64_t tb, te;
-  if (debug >= 1) tb = nanos_since_boot();
-
-  // ****** copy inputs
-  copy_inputs(finputs, true);
-
-  // ****** run commands
-  int i = 0;
-  for (auto &it : cmds) {
-    ++i;
-    if (debug >= 1) printf("run %2d @ %7lu us: ", i, (nanos_since_boot()-tb)/1000);
-    it->exec();
-    if ((i == cmds.size()) || slow) wait();
-  }
-
-  // ****** copy outputs
-  copy_output(foutput);
-
-  if (debug >= 1) {
-    te = nanos_since_boot();
-    printf("model exec in %lu us\n", (te-tb)/1000);
-  }
-}
@@ -36,7 +36,7 @@ PandaUsbHandle::PandaUsbHandle(std::string serial) : PandaCommsHandle(serial) {
  for (size_t i = 0; i < num_devices; ++i) {
    libusb_device_descriptor desc;
    libusb_get_device_descriptor(dev_list[i], &desc);
-    if (desc.idVendor == 0xbbaa && desc.idProduct == 0xddcc) {
+    if (desc.idVendor == 0x3801 && desc.idProduct == 0xddcc) {
      int ret = libusb_open(dev_list[i], &dev_handle);
      if (dev_handle == NULL || ret < 0) { goto fail; }

@@ -110,7 +110,7 @@ std::vector<std::string> PandaUsbHandle::list() {
    libusb_device *device = dev_list[i];
    libusb_device_descriptor desc;
    libusb_get_device_descriptor(device, &desc);
-    if (desc.idVendor == 0xbbaa && desc.idProduct == 0xddcc) {
+    if (desc.idVendor == 0x3801 && desc.idProduct == 0xddcc) {
      libusb_device_handle *handle = NULL;
      int ret = libusb_open(device, &handle);
      if (ret < 0) { goto finish; }
@@ -416,6 +416,7 @@ void process_peripheral_state(Panda *panda, PubMaster *pm, bool no_fan_control)

    if (ir_pwr != prev_ir_pwr || sm.frame % 100 == 0 || ir_pwr >= 50.0) {
      panda->set_ir_pwr(ir_pwr);
+      Hardware::set_ir_power(ir_pwr);
      prev_ir_pwr = ir_pwr;
    }
  }
@@ -1 +1 @@
-fakedata/
+!fakedata/*
@@ -107,7 +107,7 @@ def migrate_longitudinalPlan(msgs):
    if msg.which() != 'longitudinalPlan':
      continue
    new_msg = msg.as_builder()
-    new_msg.longitudinalPlan.aTarget, new_msg.longitudinalPlan.shouldStop = get_accel_from_plan(CP, msg.longitudinalPlan.speeds, msg.longitudinalPlan.accels)
+    new_msg.longitudinalPlan.aTarget, new_msg.longitudinalPlan.shouldStop = get_accel_from_plan(msg.longitudinalPlan.speeds, msg.longitudinalPlan.accels)
    ops.append((index, new_msg.as_reader()))
  return ops, [], []

@@ -1 +1 @@
-255ceb08c75bc85379da5ec247e612be3716fb43
+a20d7a427906c43549cf58b7a859f264aabdf28d
@@ -16,6 +16,8 @@ from openpilot.selfdrive.test.process_replay.process_replay import CONFIGS, PROC
 from openpilot.tools.lib.filereader import FileReader
 from openpilot.tools.lib.logreader import LogReader, save_log

+IS_AZURE_TOKEN_DEFINED = os.getenv("AZURE_TOKEN")
+
 source_segments = [
  ("BODY", "937ccb7243511b65|2022-05-24--16-03-09--1"),        # COMMA.COMMA_BODY
  ("HYUNDAI", "02c45f73a2e5c6e9|2021-01-01--19-08-22--1"),     # HYUNDAI.HYUNDAI_SONATA
@@ -67,6 +69,28 @@ REF_COMMIT_FN = os.path.join(PROC_REPLAY_DIR, "ref_commit")
 EXCLUDED_PROCS = {"modeld", "dmonitoringmodeld"}


+def preserve_only_specified_files_from_ref_commit(*commits_to_keep):
+  """Keep only files in fakedata that contain any of the specified commit hashes."""
+  removed = 0
+  for f in os.listdir(FAKEDATA):
+    if not any(commit in f for commit in commits_to_keep):
+      os.remove(os.path.join(FAKEDATA, f))
+      removed += 1
+  if removed > 0:
+    print(f"Removed {removed} old files from {FAKEDATA}")
+
+
+def handle_output_file(cur_log_fn, local):
+  """Handle the output file based on whether we're using remote or local storage."""
+  assert os.path.exists(cur_log_fn), f"Cannot find log to upload: {cur_log_fn}"
+
+  if local:
+    os.system(f"git add '{os.path.realpath(cur_log_fn)}'")
+  else:
+    upload_file(cur_log_fn, os.path.basename(cur_log_fn))
+    os.remove(cur_log_fn)
+
+
 def run_test_process(data):
  segment, cfg, args, cur_log_fn, ref_log_path, lr_dat = data
  res = None
@@ -77,10 +101,9 @@ def run_test_process(data):
    save_log(cur_log_fn, log_msgs)

  if args.update_refs or args.upload_only:
-    print(f'Uploading: {os.path.basename(cur_log_fn)}')
-    assert os.path.exists(cur_log_fn), f"Cannot find log to upload: {cur_log_fn}"
-    upload_file(cur_log_fn, os.path.basename(cur_log_fn))
-    os.remove(cur_log_fn)
+    print(f'Processing: {os.path.basename(cur_log_fn)}')
+    handle_output_file(cur_log_fn, args.local)
+
  return (segment, cfg.proc_name, res)


@@ -119,6 +142,27 @@ def test_process(cfg, lr, segment, ref_log_path, new_log_path, ignore_fields=Non
    return str(e), log_msgs


+def finalize_git_updates(cur_commit, ref_commit_fn):
+  """Finalize git updates and create commit."""
+  try:
+    # Add all new files first
+    os.system(f"git add {os.path.realpath(ref_commit_fn)}")
+    os.system(f"git add {os.path.realpath(FAKEDATA)}/*.zst")
+
+    # Clean up old files - keep only new ref files since they're becoming the reference
+    preserve_only_specified_files_from_ref_commit(cur_commit)
+
+    # Add the deletions to git
+    os.system(f"git add -u {os.path.realpath(FAKEDATA)}")
+
+    # Create the commit
+    commit_msg = f"test_processes: update ref logs to {cur_commit[:7]}"
+    os.system(f'git commit -m "{commit_msg}"')
+    print("Successfully committed reference log updates")
+  except Exception as e:
+    print(f"Failed to commit changes: {e}")
+
+
 if __name__ == "__main__":
  all_cars = {car for car, _ in segments}
  all_procs = {cfg.proc_name for cfg in CONFIGS if cfg.proc_name not in EXCLUDED_PROCS}
@@ -142,6 +186,8 @@ if __name__ == "__main__":
                      help="Updates reference logs using current commit")
  parser.add_argument("--upload-only", action="store_true",
                      help="Skips testing processes and uploads logs from previous test run")
+  parser.add_argument("--local", action="store_true",
+                      help="Use  local git/ storage instead of remote (Azure for Comma)")
  parser.add_argument("-j", "--jobs", type=int, default=max(cpu_count - 2, 1),
                      help="Max amount of parallel jobs")
  args = parser.parse_args()
@@ -168,6 +214,16 @@ if __name__ == "__main__":
  if not cur_commit:
    raise Exception("Couldn't get current commit")

+  # Could be set as default in args, but wanted to be more explicit on the flow.
+  if upload and not args.local and not IS_AZURE_TOKEN_DEFINED:
+    print("***** Warning: local/git run was used by default since AZURE_TOKEN was NOT found on the env variables! *****")
+    args.local = True
+
+  # Clean up old files before starting
+  if upload and args.local:
+    print("***** Cleaning up old fakedata for local/git tracked refs *****")
+    preserve_only_specified_files_from_ref_commit(cur_commit, ref_commit)
+
  print(f"***** testing against commit {ref_commit} *****")

  # check to make sure all car brands are tested
@@ -234,4 +290,8 @@ if __name__ == "__main__":
      f.write(cur_commit)
    print(f"\n\nUpdated reference logs for commit: {cur_commit}")

+    # Only do git operations if we're in local mode
+    if upload and args.local:
+      finalize_git_updates(cur_commit, REF_COMMIT_FN)
+
  sys.exit(int(failed))
@@ -36,7 +36,7 @@ CPU usage budget
 TEST_DURATION = 25
 LOG_OFFSET = 8

-MAX_TOTAL_CPU = 265.  # total for all 8 cores
+MAX_TOTAL_CPU = 275.  # total for all 8 cores
 PROCS = {
  # Baseline CPU usage by process
  "selfdrive.controls.controlsd": 16.0,
@@ -50,8 +50,8 @@ PROCS = {
  "selfdrive.locationd.paramsd": 9.0,
  "./sensord": 7.0,
  "selfdrive.controls.radard": 2.0,
-  "selfdrive.modeld.modeld": 17.0,
-  "selfdrive.modeld.dmonitoringmodeld": 11.0,
+  "selfdrive.modeld.modeld": 22.0,
+  "selfdrive.modeld.dmonitoringmodeld": 21.0,
  "system.hardware.hardwared": 4.0,
  "selfdrive.locationd.calibrationd": 2.0,
  "selfdrive.locationd.torqued": 5.0,
@@ -371,13 +371,14 @@ class TestOnroad:
    result += "------------------------------------------------\n"
    result += "----------------- Model Timing -----------------\n"
    result += "------------------------------------------------\n"
-    # TODO: this went up when plannerd cpu usage increased, why?
    cfgs = [
-      ("modelV2", 0.050, 0.036),
-      ("driverStateV2", 0.050, 0.026),
+      ("modelV2", 0.045, 0.035),
+      ("driverStateV2", 0.045, 0.035),
    ]
    for (s, instant_max, avg_max) in cfgs:
      ts = [getattr(m, s).modelExecutionTime for m in self.msgs[s]]
+      # TODO some init can happen in first iteration
+      ts = ts[1:]
      assert max(ts) < instant_max, f"high '{s}' execution time: {max(ts)}"
      assert np.mean(ts) < avg_max, f"high avg '{s}' execution time: {np.mean(ts)}"
      result += f"'{s}' execution time: min  {min(ts):.5f}s\n"
@@ -67,14 +67,13 @@ if GetOption('extras'):
  qt_src.remove("main.cc")  # replaced by test_runner
  qt_env.Program('tests/test_translations', [asset_obj, 'tests/test_runner.cc', 'tests/test_translations.cc'] + qt_src, LIBS=qt_libs)

-if GetOption('extras') and arch != "Darwin":
+if GetOption('extras'):
  qt_env.SharedLibrary("qt/python_helpers", ["qt/qt_window.cc"], LIBS=qt_libs)

  # spinner and text window
  qt_env.Program("_text", ["qt/text.cc"], LIBS=qt_libs)
  qt_env.Program("_spinner", ["qt/spinner.cc"], LIBS=qt_libs)

-
  # setup and factory resetter
  qt_env.Program("qt/setup/reset", ["qt/setup/reset.cc"], LIBS=qt_libs)
  qt_env.Program("qt/setup/setup", ["qt/setup/setup.cc", asset_obj],
@@ -83,29 +82,30 @@ if GetOption('extras') and arch != "Darwin":
  # build updater UI
  qt_env.Program("qt/setup/updater", ["qt/setup/updater.cc", asset_obj], LIBS=qt_libs)

-  # build installers
-  senv = qt_env.Clone()
-  senv['LINKFLAGS'].append('-Wl,-strip-debug')
+  if arch != "Darwin":
+    # build installers
+    senv = qt_env.Clone()
+    senv['LINKFLAGS'].append('-Wl,-strip-debug')

-  release = "release3"
-  installers = [
-    ("openpilot", release),
-    ("openpilot_test", f"{release}-staging"),
-    ("openpilot_nightly", "nightly"),
-    ("openpilot_internal", "nightly-dev"),
-  ]
+    release = "release3"
+    installers = [
+      ("openpilot", release),
+      ("openpilot_test", f"{release}-staging"),
+      ("openpilot_nightly", "nightly"),
+      ("openpilot_internal", "nightly-dev"),
+    ]

-  cont = senv.Command(f"installer/continue_openpilot.o", f"installer/continue_openpilot.sh",
-                      "ld -r -b binary -o $TARGET $SOURCE")
-  for name, branch in installers:
-    d = {'BRANCH': f"'\"{branch}\"'"}
-    if "internal" in name:
-      d['INTERNAL'] = "1"
+    cont = senv.Command(f"installer/continue_openpilot.o", f"installer/continue_openpilot.sh",
+                        "ld -r -b binary -o $TARGET $SOURCE")
+    for name, branch in installers:
+      d = {'BRANCH': f"'\"{branch}\"'"}
+      if "internal" in name:
+        d['INTERNAL'] = "1"

-    obj = senv.Object(f"installer/installers/installer_{name}.o", ["installer/installer.cc"], CPPDEFINES=d)
-    f = senv.Program(f"installer/installers/installer_{name}", [obj, cont], LIBS=qt_libs)
-    # keep installers small
-    assert f[0].get_size() < 370*1e3
+      obj = senv.Object(f"installer/installers/installer_{name}.o", ["installer/installer.cc"], CPPDEFINES=d)
+      f = senv.Program(f"installer/installers/installer_{name}", [obj, cont], LIBS=qt_libs)
+      # keep installers small
+      assert f[0].get_size() < 370*1e3

 # build watch3
 if arch in ['x86_64', 'aarch64', 'Darwin'] or GetOption('extras'):
@@ -203,7 +203,7 @@ void WifiManager::connect(const Network &n, const bool is_hidden, const QString
  connection["ipv4"]["dns-priority"] = 600;
  connection["ipv6"]["method"] = "ignore";

-  call(NM_DBUS_PATH_SETTINGS, NM_DBUS_INTERFACE_SETTINGS, "AddConnection", QVariant::fromValue(connection));
+  asyncCall(NM_DBUS_PATH_SETTINGS, NM_DBUS_INTERFACE_SETTINGS, "AddConnection", QVariant::fromValue(connection));
 }

 void WifiManager::deactivateConnectionBySsid(const QString &ssid) {
@@ -330,6 +330,10 @@ void WifiManager::initConnections() {
      lteConnectionPath = path;
    }
  }
+
+  if (!isKnownConnection(tethering_ssid)) {
+    addTetheringConnection();
+  }
 }

 std::optional<QDBusPendingCall> WifiManager::activateWifiConnection(const QString &ssid) {
@@ -399,9 +403,13 @@ void WifiManager::updateGsmSettings(bool roaming, QString apn, bool metered) {
    }

    if (changes) {
-      call(lteConnectionPath.path(), NM_DBUS_INTERFACE_SETTINGS_CONNECTION, "UpdateUnsaved", QVariant::fromValue(settings));  // update is temporary
-      deactivateConnection(lteConnectionPath);
-      activateModemConnection(lteConnectionPath);
+      QDBusPendingCall pending_call = asyncCall(lteConnectionPath.path(), NM_DBUS_INTERFACE_SETTINGS_CONNECTION, "UpdateUnsaved", QVariant::fromValue(settings));  // update is temporary
+      QDBusPendingCallWatcher *watcher = new QDBusPendingCallWatcher(pending_call);
+      QObject::connect(watcher, &QDBusPendingCallWatcher::finished, this, [this, watcher]() {
+        deactivateConnection(lteConnectionPath);
+        activateModemConnection(lteConnectionPath);
+        watcher->deleteLater();
+      });
    }
  }
 }
@@ -434,10 +442,7 @@ void WifiManager::addTetheringConnection() {
  connection["ipv4"]["route-metric"] = 1100;
  connection["ipv6"]["method"] = "ignore";

-  auto path = call<QDBusObjectPath>(NM_DBUS_PATH_SETTINGS, NM_DBUS_INTERFACE_SETTINGS, "AddConnection", QVariant::fromValue(connection));
-  if (!path.path().isEmpty()) {
-    knownConnections[path] = tethering_ssid;
-  }
+  asyncCall(NM_DBUS_PATH_SETTINGS, NM_DBUS_INTERFACE_SETTINGS, "AddConnection", QVariant::fromValue(connection));
 }

 void WifiManager::tetheringActivated(QDBusPendingCallWatcher *call) {
@@ -453,10 +458,6 @@ void WifiManager::tetheringActivated(QDBusPendingCallWatcher *call) {

 void WifiManager::setTetheringEnabled(bool enabled) {
  if (enabled) {
-    if (!isKnownConnection(tethering_ssid)) {
-      addTetheringConnection();
-    }
-
    auto pending_call = activateWifiConnection(tethering_ssid);

    if (pending_call) {
@@ -478,9 +479,6 @@ bool WifiManager::isTetheringEnabled() {
 }

 QString WifiManager::getTetheringPassword() {
-  if (!isKnownConnection(tethering_ssid)) {
-    addTetheringConnection();
-  }
  const QDBusObjectPath &path = getConnectionPath(tethering_ssid);
  if (!path.path().isEmpty()) {
    QDBusReply<QMap<QString, QVariantMap>> response = call(path.path(), NM_DBUS_INTERFACE_SETTINGS_CONNECTION, "GetSecrets", "802-11-wireless-security");
@@ -100,9 +100,9 @@ send_queue: Queue[str] = queue.Queue()
 upload_queue: Queue[UploadItem] = queue.Queue()
 low_priority_send_queue: Queue[str] = queue.Queue()
 log_recv_queue: Queue[str] = queue.Queue()
+cancelled_uploads: set[str] = set()

 cur_upload_items: dict[int, UploadItem | None] = {}
-cur_upload_items_lock = threading.Lock()


 def strip_zst_extension(fn: str) -> str:
@@ -130,9 +130,8 @@ class UploadQueueCache:
  @staticmethod
  def cache(upload_queue: Queue[UploadItem]) -> None:
    try:
-      with upload_queue.mutex:
-        items = [asdict(item) for item in upload_queue.queue]
-
+      queue: list[UploadItem | None] = list(upload_queue.queue)
+      items = [asdict(i) for i in queue if i is not None and (i.id not in cancelled_uploads)]
      Params().put("AthenadUploadQueue", json.dumps(items))
    except Exception:
      cloudlog.exception("athena.UploadQueueCache.cache.exception")
@@ -199,13 +198,11 @@ def retry_upload(tid: int, end_event: threading.Event, increase_count: bool = Tr
      progress=0,
      current=False
    )
-
-    with cur_upload_items_lock:
-      upload_queue.put_nowait(item)
-      cur_upload_items[tid] = None
-
+    upload_queue.put_nowait(item)
    UploadQueueCache.cache(upload_queue)

+    cur_upload_items[tid] = None
+
    for _ in range(RETRY_DELAY):
      time.sleep(1)
      if end_event.is_set():
@@ -224,8 +221,7 @@ def cb(sm, item, tid, end_event: threading.Event, sz: int, cur: int) -> None:
  if end_event.is_set():
    raise AbortTransferException

-  with cur_upload_items_lock:
-    cur_upload_items[tid] = replace(item, progress=cur / sz if sz else 1)
+  cur_upload_items[tid] = replace(item, progress=cur / sz if sz else 1)


 def upload_handler(end_event: threading.Event) -> None:
@@ -233,10 +229,14 @@ def upload_handler(end_event: threading.Event) -> None:
  tid = threading.get_ident()

  while not end_event.is_set():
+    cur_upload_items[tid] = None
+
    try:
-      with cur_upload_items_lock:
-        cur_upload_items[tid] = None
-        cur_upload_items[tid] = item = replace(upload_queue.get(timeout=1), current=True)
+      cur_upload_items[tid] = item = replace(upload_queue.get(timeout=1), current=True)
+
+      if item.id in cancelled_uploads:
+        cancelled_uploads.remove(item.id)
+        continue

      # Remove item if too old
      age = datetime.now() - datetime.fromtimestamp(item.created_at / 1000)
@@ -415,10 +415,8 @@ def uploadFilesToUrls(files_data: list[UploadFileDict]) -> UploadFilesToUrlRespo

@dispatcher.add_method
 def listUploadQueue() -> list[UploadItemDict]:
-  with cur_upload_items_lock, upload_queue.mutex:
-    items = list(upload_queue.queue) + [item for item in cur_upload_items.values() if item is not None]
-
-  return [asdict(item) for item in items]
+  items = list(upload_queue.queue) + list(cur_upload_items.values())
+  return [asdict(i) for i in items if (i is not None) and (i.id not in cancelled_uploads)]


@dispatcher.add_method
@@ -426,14 +424,13 @@ def cancelUpload(upload_id: str | list[str]) -> dict[str, int | str]:
  if not isinstance(upload_id, list):
    upload_id = [upload_id]

-  with upload_queue.mutex:
-    remaining_items = [item for item in upload_queue.queue if item.id not in upload_id]
-    if len(remaining_items) == len(upload_queue.queue):
-      return {"success": 0, "error": "not found"}
+  uploading_ids = {item.id for item in list(upload_queue.queue)}
+  cancelled_ids = uploading_ids.intersection(upload_id)
+  if len(cancelled_ids) == 0:
+    return {"success": 0, "error": "not found"}

-    upload_queue.queue.clear()
-    upload_queue.queue.extend(remaining_items)
-    return {"success": 1}
+  cancelled_uploads.update(cancelled_ids)
+  return {"success": 1}

@dispatcher.add_method
 def setRouteViewed(route: str) -> dict[str, int | str]:
@@ -78,6 +78,7 @@ class TestAthenadMethods:

    athenad.upload_queue = queue.Queue()
    athenad.cur_upload_items.clear()
+    athenad.cancelled_uploads.clear()

    for i in os.listdir(Paths.log_root()):
      p = os.path.join(Paths.log_root(), i)
@@ -281,10 +282,13 @@ class TestAthenadMethods:
    athenad.upload_queue.put_nowait(item)
    dispatcher["cancelUpload"](item.id)

+    assert item.id in athenad.cancelled_uploads
+
    self._wait_for_upload()
    time.sleep(0.1)

    assert athenad.upload_queue.qsize() == 0
+    assert len(athenad.cancelled_uploads) == 0

  @with_upload_handler
  def test_cancel_expiry(self):
@@ -327,7 +331,7 @@ class TestAthenadMethods:
    assert items[0] == asdict(item)
    assert not items[0]['current']

-    dispatcher["cancelUpload"](item.id)
+    athenad.cancelled_uploads.add(item.id)
    items = dispatcher["listUploadQueue"]()
    assert len(items) == 0

@@ -339,7 +343,7 @@ class TestAthenadMethods:
    athenad.upload_queue.put_nowait(item2)

    # Ensure canceled items are not persisted
-    dispatcher["cancelUpload"](item2.id)
+    athenad.cancelled_uploads.add(item2.id)

    # serialize item
    athenad.UploadQueueCache.cache(athenad.upload_queue)
@@ -89,12 +89,14 @@ void CameraBuf::init(cl_device_id device_id, cl_context context, SpectraCamera *
  vipc_server->create_buffers_with_sizes(stream_type, VIPC_BUFFER_COUNT, out_img_width, out_img_height, nv12_size, cam->stride, cam->uv_offset);
  LOGD("created %d YUV vipc buffers with size %dx%d", VIPC_BUFFER_COUNT, cam->stride, cam->y_height);

-  imgproc = new ImgProc(device_id, context, this, sensor, cam->cc.camera_num, cam->stride, cam->uv_offset);
+  if (is_raw) imgproc = new ImgProc(device_id, context, this, sensor, cam->cc.camera_num, cam->stride, cam->uv_offset);
 }

 CameraBuf::~CameraBuf() {
-  for (int i = 0; i < frame_buf_count; i++) {
-    camera_bufs_raw[i].free();
+  if (camera_bufs_raw != nullptr) {
+    for (int i = 0; i < frame_buf_count; i++) {
+      camera_bufs_raw[i].free();
+    }
  }
  if (imgproc) delete imgproc;
 }
@@ -55,7 +55,7 @@ public:

  float fl_pix = 0;

-  CameraState(SpectraMaster *master, const CameraConfig &config) : camera(master, config, true /*config.stream_type == VISION_STREAM_ROAD*/) {};
+  CameraState(SpectraMaster *master, const CameraConfig &config) : camera(master, config, config.stream_type == VISION_STREAM_ROAD) {};
  ~CameraState();
  void init(VisionIpcServer *v, cl_device_id device_id, cl_context ctx);
  void update_exposure_score(float desired_ev, int exp_t, int exp_g_idx, float exp_gain);
@@ -14,7 +14,7 @@ int write_dmi(uint8_t *dst, uint64_t *addr, uint32_t length, uint32_t dmi_addr,
  return sizeof(struct cdm_dmi_cmd);
 }

-int write_cont(uint8_t *dst, uint32_t reg, std::vector<uint32_t> vals) {
+int write_cont(uint8_t *dst, uint32_t reg, const std::vector<uint32_t> &vals) {
  struct cdm_regcontinuous_cmd *cmd = (struct cdm_regcontinuous_cmd*)dst;
  cmd->cmd = CAM_CDM_CMD_REG_CONT;
  cmd->count = vals.size();
@@ -31,7 +31,7 @@ int write_cont(uint8_t *dst, uint32_t reg, std::vector<uint32_t> vals) {
  return sizeof(struct cdm_regcontinuous_cmd) + vals.size()*sizeof(uint32_t);
 }

-int write_random(uint8_t *dst, std::vector<uint32_t> vals) {
+int write_random(uint8_t *dst, const std::vector<uint32_t> &vals) {
  struct cdm_regrandom_cmd *cmd = (struct cdm_regrandom_cmd*)dst;
  cmd->cmd = CAM_CDM_CMD_REG_RANDOM;
  cmd->count = vals.size() / 2;
@@ -7,8 +7,8 @@
 #include <memory>

 // our helpers
-int write_random(uint8_t *dst, std::vector<uint32_t> vals);
-int write_cont(uint8_t *dst, uint32_t reg, std::vector<uint32_t> vals);
+int write_random(uint8_t *dst, const std::vector<uint32_t> &vals);
+int write_cont(uint8_t *dst, uint32_t reg, const std::vector<uint32_t> &vals);
 int write_dmi(uint8_t *dst, uint64_t *addr, uint32_t length, uint32_t dmi_addr, uint8_t sel);

 // from drivers/media/platform/msm/camera/cam_cdm/cam_cdm_util.{c,h}
@@ -19,6 +19,17 @@
 #endif

 float get_vignetting_s(float r) {
+#if defined(VIGNETTE_PROFILE_4DT6MM)
+  if (r < 100000) {
+    return 1.0f + 0.0000013f*r;
+  } else if (r < 250000) {
+    return 1.02f + 0.0000011f*r;
+  } else if (r < 400000) {
+    return 0.92f + 0.0000015f*r;
+  } else {
+    return 0.44f + 0.0000027f*r;
+  }
+#elif defined(VIGNETTE_PROFILE_8DT0MM)
  if (r < 62500) {
    return (1.0f + 0.0000008f*r);
  } else if (r < 490000) {
@@ -28,6 +39,9 @@ float get_vignetting_s(float r) {
  } else {
    return (0.53503625f + 0.0000000000022f*r*r);
  }
+#else
+  return 1.0f;
+#endif
 }

 int4 parse_12bit(uchar8 pvs) {
@@ -65,7 +79,7 @@ __kernel void process_raw(const __global uchar * in, __global uchar * out, int e
  #if VIGNETTING
    int gx = (gid_x*2 - RGB_WIDTH/2);
    int gy = (gid_y*2 - RGB_HEIGHT/2);
-    const float vignette_factor = get_vignetting_s((gx*gx + gy*gy) / VIGNETTE_RSZ);
+    const float vignette_factor = get_vignetting_s(gx*gx + gy*gy);
  #else
    const float vignette_factor = 1.0;
  #endif
@@ -51,12 +51,12 @@ int do_sync_control(int fd, uint32_t id, void *handle, uint32_t size) {
  };
  int ret = HANDLE_EINTR(ioctl(fd, CAM_PRIVATE_IOCTL_CMD, &arg));

-  int32_t ioctl_result = (int32_t)arg.result;
+  int32_t ioctl_result = static_cast<int32_t>(arg.result);
  if (ret < 0) {
    LOGE("CAM_SYNC error: id %u - errno %d - ret %d - ioctl_result %d", id, errno, ret, ioctl_result);
    return ret;
  }
-  if (ioctl_result < 0) {
+  if (ioctl_result != 0) {
    LOGE("CAM_SYNC error: id %u - errno %d - ret %d - ioctl_result %d", id, errno, ret, ioctl_result);
    return ioctl_result;
  }
@@ -196,9 +196,9 @@ void SpectraMaster::init() {
  assert(isp_fd >= 0);
  LOGD("opened isp");

-  //icp_fd = open_v4l_by_name_and_index("cam-icp");
-  //assert(icp_fd >= 0);
-  //LOGD("opened icp");
+  icp_fd = open_v4l_by_name_and_index("cam-icp");
+  assert(icp_fd >= 0);
+  LOGD("opened icp");

  // query ISP for MMU handles
  LOG("-- Query for MMU handles");
@@ -215,7 +215,6 @@ void SpectraMaster::init() {
  cdm_iommu = isp_query_cap_cmd.cdm_iommu.non_secure;

  // query ICP for MMU handles
-  /*
  struct cam_icp_query_cap_cmd icp_query_cap_cmd = {0};
  query_cap_cmd.caps_handle = (uint64_t)&icp_query_cap_cmd;
  query_cap_cmd.size = sizeof(icp_query_cap_cmd);
@@ -223,7 +222,6 @@ void SpectraMaster::init() {
  assert(ret == 0);
  LOGD("using ICP MMU handle: %x", icp_query_cap_cmd.dev_iommu_handle.non_secure);
  icp_device_iommu = icp_query_cap_cmd.dev_iommu_handle.non_secure;
-  */

  // subscribe
  LOG("-- Subscribing");
@@ -675,18 +673,21 @@ void SpectraCamera::enqueue_buffer(int i, bool dp) {
  uint64_t request_id = request_ids[i];

  if (sync_objs[i]) {
-    // wait
+    // SOF has come in, wait until readout is complete
    struct cam_sync_wait sync_wait = {0};
    sync_wait.sync_obj = sync_objs[i];
-    sync_wait.timeout_ms = 50; // max dt tolerance, typical should be 23
+    sync_wait.timeout_ms = 100;
    ret = do_sync_control(m->cam_sync_fd, CAM_SYNC_WAIT, &sync_wait, sizeof(sync_wait));
    if (ret != 0) {
-      LOGE("failed to wait for sync: %d %d", ret, sync_wait.sync_obj);
      // TODO: handle frame drop cleanly
+      // when this happens, it messes up future frames
+      LOGE("failed to wait for sync: %d %d", ret, sync_wait.sync_obj);
    }
    buf.frame_metadata[i].timestamp_end_of_isp = (uint64_t)nanos_since_boot();
    buf.frame_metadata[i].timestamp_eof = buf.frame_metadata[i].timestamp_sof + sensor->readout_time_ns;
-    if (dp) buf.queue(i);
+    if (dp) {
+      buf.queue(i);
+    }

    // destroy old output fence
    for (auto so : {sync_objs, sync_objs_bps_out}) {
@@ -709,13 +710,13 @@ void SpectraCamera::enqueue_buffer(int i, bool dp) {
  }
  sync_objs[i] = sync_create.sync_obj;

-  /*
-  ret = do_cam_control(m->cam_sync_fd, CAM_SYNC_CREATE, &sync_create, sizeof(sync_create));
-  if (ret != 0) {
-    LOGE("failed to create fence: %d %d", ret, sync_create.sync_obj);
+  if (icp_dev_handle > 0) {
+    ret = do_cam_control(m->cam_sync_fd, CAM_SYNC_CREATE, &sync_create, sizeof(sync_create));
+    if (ret != 0) {
+      LOGE("failed to create fence: %d %d", ret, sync_create.sync_obj);
+    }
+    sync_objs_bps_out[i] = sync_create.sync_obj;
  }
-  sync_objs_bps_out[i] = sync_create.sync_obj;
-  */

  // schedule request with camera request manager
  struct cam_req_mgr_sched_request req_mgr_sched_request = {0};
@@ -743,8 +744,10 @@ void SpectraCamera::camera_map_bufs() {
    mem_mgr_map_cmd.flags = CAM_MEM_FLAG_HW_READ_WRITE;
    mem_mgr_map_cmd.mmu_hdls[0] = m->device_iommu;
    mem_mgr_map_cmd.num_hdl = 1;
-    //mem_mgr_map_cmd.mmu_hdls[1] = m->icp_device_iommu;
-    //mem_mgr_map_cmd.num_hdl = 2;
+    if (icp_dev_handle > 0) {
+      mem_mgr_map_cmd.num_hdl = 2;
+      mem_mgr_map_cmd.mmu_hdls[1] = m->icp_device_iommu;
+    }

    if (is_raw) {
      // RAW bayer images
@@ -896,8 +899,6 @@ void SpectraCamera::configISP() {
 }

 void SpectraCamera::configICP() {
-  if (!enabled) return;
-
  /*
    Configures both the ICP and BPS.
  */
@@ -1045,6 +1046,10 @@ void SpectraCamera::camera_close() {

    // release devices
    LOGD("-- Release devices");
+    if (icp_dev_handle > 0) {
+      ret = device_control(m->icp_fd, CAM_RELEASE_DEV, session_handle, icp_dev_handle);
+      LOGD("release icp: %d", ret);
+    }
    ret = device_control(m->isp_fd, CAM_RELEASE_DEV, session_handle, isp_dev_handle);
    LOGD("release isp: %d", ret);
    ret = device_control(csiphy_fd, CAM_RELEASE_DEV, session_handle, csiphy_dev_handle);
@@ -22,8 +22,9 @@ const int MIPI_SETTLE_CNT = 33;  // Calculated by camera_freqs.py

 // CSLDeviceType/CSLPacketOpcodesIFE from camx
 // cam_packet_header.op_code = (device << 24) | (opcode);
-#define CSLDeviceTypeImageSensor (0x1 << 24)
-#define CSLDeviceTypeIFE         (0xF << 24)
+#define CSLDeviceTypeImageSensor (0x01 << 24)
+#define CSLDeviceTypeIFE         (0x0F << 24)
+#define CSLDeviceTypeBPS         (0x10 << 24)
 #define OpcodesIFEInitialConfig  0x0
 #define OpcodesIFEUpdate         0x1

@@ -1,9 +1,10 @@
 #if SENSOR_ID == 1

+#define VIGNETTE_PROFILE_8DT0MM
+
 #define BIT_DEPTH 12
 #define PV_MAX 4096
 #define BLACK_LVL 168
-#define VIGNETTE_RSZ 1.0f

 float4 normalize_pv(int4 parsed, float vignette_factor) {
  float4 pv = (convert_float4(parsed) - BLACK_LVL) / (PV_MAX - BLACK_LVL);
@@ -30,4 +31,4 @@ float3 apply_gamma(float3 rgb, int expo_time) {
    ((rk * (rgb-mp) * (gamma_k*mp+gamma_b) * (1+1/(rk*mp)) / (1-rk*(rgb-mp))) + gamma_k*mp + gamma_b);
 }

-#endif
+#endif
@@ -1,13 +1,13 @@
 #if SENSOR_ID == 3

 #define BGGR
+#define VIGNETTE_PROFILE_4DT6MM

 #define BIT_DEPTH 12
 #define PV_MAX10 1023
 #define PV_MAX12 4095
 #define PV_MAX16 65536 // gamma curve is calibrated to 16bit
 #define BLACK_LVL 48
-#define VIGNETTE_RSZ 2.2545f

 float combine_dual_pvs(float lv, float sv, int expo_time) {
  float svc = fmax(sv * expo_time, (float)(64 * (PV_MAX10 - BLACK_LVL)));
@@ -1,8 +1,9 @@
 #if SENSOR_ID == 2

+#define VIGNETTE_PROFILE_8DT0MM
+
 #define BIT_DEPTH 12
 #define BLACK_LVL 64
-#define VIGNETTE_RSZ 1.0f

 float ox_lut_func(int x) {
  if (x < 512) {
@@ -10,7 +10,7 @@ echo 0 | sudo tee /sys/module/cam_debug_util/parameters/debug_mdl
 sudo dmesg -C
 scons -u -j8 --minimal .
 export DEBUG_FRAMES=1
-#export DISABLE_ROAD=1 DISABLE_WIDE_ROAD=1
-export DISABLE_DRIVER=1
-#export LOGPRINT=debug
+export DISABLE_ROAD=1 DISABLE_WIDE_ROAD=1
+#export DISABLE_DRIVER=1
+export LOGPRINT=debug
 ./camerad
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -e
+
+cd /sys/kernel/debug/tracing
+echo "" > trace
+echo 1 > tracing_on
+#echo Y > /sys/kernel/debug/camera_icp/a5_debug_q
+echo 0x1 > /sys/kernel/debug/camera_icp/a5_debug_type
+echo 1 > /sys/kernel/debug/tracing/events/camera/enable
+echo 0xffffffff > /sys/kernel/debug/camera_icp/a5_debug_lvl
+echo 1 > /sys/kernel/debug/tracing/events/camera/cam_icp_fw_dbg/enable
+
+cat /sys/kernel/debug/tracing/trace_pipe
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
DevTekVE	117649019b	using lfs instead	2024-12-19 22:09:29 +01:00
DevTekVE	c2c8a068b0	initial data	2024-12-19 22:05:22 +01:00
DevTekVE	f15d625c9b	update submodule	2024-12-19 22:02:27 +01:00
DevTekVE	26b3182a6a	update submodule	2024-12-19 22:00:06 +01:00
DevTekVE	df2960d170	test	2024-12-19 21:57:32 +01:00
DevTekVE	0e14cf53fe	clean	2024-12-19 21:54:53 +01:00
DevTekVE	a2051d99f5	clean update gitmodule test	2024-12-19 21:54:36 +01:00
Jason Wen	cf74e6416c	Sync: `commaai/openpilot:master` into `sunnypilot/sunnypilot:master-new` (#488 )	2024-12-19 10:57:30 -05:00
devtekve	7592669d74	test_processes: update ref logs to `a20d7a4`	2024-12-19 15:46:00 +00:00
DevTekVE	a20d7a4279	Bump panda for sync	2024-12-19 16:30:02 +01:00
DevTekVE	e4fc6ffe7a	Merge remote-tracking branch 'comma/master' into sync-20241219-3 # Conflicts: # panda # selfdrive/test/process_replay/ref_commit	2024-12-19 16:29:02 +01:00
DevTekVE	cf30110d65	CI: cherry pick maxime's fix for pipelines docker (#487 ) set python upper bound to 3.13 (#34286) * try * test * ... * wow * y * docker * < (cherry picked from commit `9c9b273a3e`) Co-authored-by: Maxime Desroches <desroches.maxime@gmail.com>	2024-12-19 15:50:20 +01:00
DevTekVE	a48d43ec2b	CI: adding recurring sync to lfs (#489 ) Allows us to automatically keep our LFS in sync with comma's and also to manually perform a sync if we need to. Even able to sync the LFS from a given commit hash or a given branch. Useful for model stuff.	2024-12-19 15:32:40 +01:00
Maxime Desroches	9c9b273a3e	set python upper bound to 3.13 (#34286 ) * try * test * ... * wow * y * docker * <	2024-12-18 22:17:28 -08:00
Harald Schäfer	383893d39e	Long planner get accel: new function args (#34288 ) * Change function args * typo * typo * ref commit	2024-12-18 16:41:57 -08:00
YassineYousfi	1a7c284445	National Public Radio Model 📻 (#34259 ) * f3a009b7-dcb9-41f3-8917-6fcb3cec37bf/400 * 65f26b40-56c9-4c6c-a3ac-e1788bd52567/400	2024-12-18 16:13:47 -08:00
Lukas	af5082089e	cabana: issue filtering by addresses in FindSignal tool (#34283 ) fixed filtering by addresses issue in cabana	2024-12-18 14:11:05 -08:00
Harald Schäfer	17ca6389e1	Tinygrad runner (#34261 ) * squash * dmonitoringmodeld: use cl transform (#34235) * needs cleanup * only if tici * bump tinygrad * check width * base modelframe * . * need to be args * more cleanup * no _frame in base * tici only * its DrivingModelFrame * .6 is fair --------- Co-authored-by: Comma Device <device@comma.ai> * Update tinygrad * tg upstream * bump tg * bump tg * debug * attr * misc cleanup * whitespace * remove * Add TODOs to make python proc for modelrunners * whitespace --------- Co-authored-by: ZwX1616 <zwx1616@gmail.com> Co-authored-by: Comma Device <device@comma.ai> Co-authored-by: Maxime Desroches <desroches.maxime@gmail.com>	2024-12-18 11:58:59 -08:00
Adeeb Shihadeh	ff97a43c50	fix typo	2024-12-17 20:42:44 -08:00
ZwX1616	9c3aa2e2dc	camerad: add os04+4.6mm lsc profile (#34280 ) * draft * ifdef in cl --------- Co-authored-by: Comma Device <device@comma.ai>	2024-12-17 20:32:08 -08:00
Shane Smiskol	7ffad1935d	bump msgq (#34278 ) bump	2024-12-17 15:43:51 -08:00
Maxime Desroches	155d842a3b	set plot limits for touch events viz (#34277 ) lim	2024-12-17 15:10:13 -08:00
Dean Lee	d40fd1956d	wifimanager: use asyncCall to avoid timeout when adding connections (#34273 ) * use asyncCall to avoid timeout when adding Wi-Fi connections * use async call for addTetheringConnection * only this change --------- Co-authored-by: Shane Smiskol <shane@smiskol.com>	2024-12-17 14:48:08 -08:00
Shane Smiskol	857133635c	ui: wait for lte connection to update before reactivating (#34275 ) async	2024-12-17 14:35:43 -08:00
Shane Smiskol	f149083e4a	ui: initialize tethering connection on startup (#34274 ) * initialize hotspot connection on init * better place * fix	2024-12-17 14:25:31 -08:00
Adeeb Shihadeh	247ee2bda8	bump panda for new USB VID	2024-12-17 11:18:41 -08:00
Jason Wen	c8fe86c552	Sync: `commaai/openpilot:master` into `sunnypilot/sunnypilot:master-new` (#484 )	2024-12-17 07:04:03 -05:00
Jason Wen	952354c847	test_processes: update ref logs to `fffa98e`	2024-12-17 06:49:31 -05:00
Jason Wen	fffa98ee85	Merge branch 'upstream/master' into sync-20241712 # Conflicts: # .github/workflows/ci_weekly_run.yaml # opendbc_repo # panda # selfdrive/test/process_replay/ref_commit	2024-12-17 06:27:31 -05:00
Adeeb Shihadeh	e317485200	tici: fix device types (#34269 )	2024-12-16 19:44:43 -08:00
Shane Smiskol	3da346e2e4	AGNOS: fix update loop (#34268 ) fix update loop	2024-12-16 18:30:08 -08:00
Adeeb Shihadeh	6c1314baf9	camerad: only build debayer kernel when necessary (#34267 )	2024-12-16 17:22:59 -08:00
Maxime Desroches	71b02f8001	hardwared: non blocking read for touch events (#34263 ) * slow * slow * non blocking * 10 * try * simple * int * test * get * try * clean * read all * nested * simpler * indent * cleanup	2024-12-16 17:10:47 -08:00
Jason Wen	a984903298	Hyundai: Allow controls with Sport and Manumatic Gears (#34113 ) * Hyundai CAN: Explicitly parse gear shifter values for `EV, HEV, PHEV` * for this pr * more segments * found 4.0! * only print when spornt=4.0 is found * new outputs * bump opendbc * Update selfdrive/car/car_specific.py * delete notebook --------- Co-authored-by: Shane Smiskol <shane@smiskol.com>	2024-12-16 16:51:04 -08:00
Jason Wen	c69cd934af	Setup sunnypilot package (#483 ) Setup sunnypilot as a package	2024-12-16 19:47:29 -05:00
Adeeb Shihadeh	bedbe6fd94	agnos 11.4 (#34250 )	2024-12-16 16:46:03 -08:00
ZwX1616	7352e612a2	dmonitoringmodeld: use cl transform without tinygrad (#34266 ) * merge * why * self.buf_size * 0.05 more than with tg due to copy --------- Co-authored-by: Comma Device <device@comma.ai>	2024-12-16 16:29:06 -08:00
Jason Wen	35278ba63b	ui: Allow Qt spinner/text/setup/reset/updater to build on macOS (#34265 ) ui: Allow spinner/text/setup/reset/updater to build on macOS	2024-12-16 16:25:46 -08:00
Adeeb Shihadeh	a82116ac46	camerad: fix VisionBuf freeing (#34264 ) Co-authored-by: Comma Device <device@comma.ai>	2024-12-16 15:12:17 -08:00
Adeeb Shihadeh	b2930682ff	tici: only write eSIM connection once	2024-12-16 14:41:36 -08:00
Harald Schäfer	5018cf75ff	North America Model (#34260 ) * 0e1c9c12-0472-4a0c-8963-611ad240ec62/400 * rm outputs	2024-12-16 14:17:31 -08:00
Harald Schäfer	a98210aeec	modeld: ort helpers (#34258 ) * ort helpers * import from ort helpers * import that too * linter * linter * linter	2024-12-16 13:10:00 -08:00
commaci-public	11fb0b95d2	[bot] Update Python packages (#34256 ) Update Python packages Co-authored-by: Vehicle Researcher <user@comma.ai>	2024-12-16 10:28:17 -08:00
Edward Wang	ea444ec340	Standardize ENV=* formatting in Dockerfiles (#34253 ) fix ENV formatting	2024-12-16 09:47:32 -08:00
Mike Busuttil	cf4fae5464	CTF.md typo (#34257 ) typo	2024-12-16 09:37:29 -08:00
commaci-public	833a67b019	[bot] Update Python packages (#34251 ) Update Python packages Co-authored-by: Vehicle Researcher <user@comma.ai>	2024-12-15 21:05:29 -08:00
Adeeb Shihadeh	8558928864	add branch guide to the readme	2024-12-15 13:36:51 -08:00
Adeeb Shihadeh	df2bf83846	op/switch: more robust switching	2024-12-15 09:39:23 -08:00
Adeeb Shihadeh	d735db6113	rm cppcheck (#34248 )	2024-12-14 23:17:45 -08:00
Adeeb Shihadeh	b6233838eb	macOS: disable brew auto update (#34247 )	2024-12-14 23:10:04 -08:00
Adeeb Shihadeh	ba0e7c4719	hardware: add helper for setting IR power (#34245 ) * hardware: add helper for setting IR power * fix	2024-12-14 13:50:25 -08:00
Maxime Desroches	70fa0ab4c1	debug: touch events plot (#34242 ) * replay * remove	2024-12-14 13:09:59 -08:00
Harald Schäfer	f6885dcbec	Revert Tinygrad (#34243 ) * Revert "dmonitoringmodeld: use cl transform (#34235)" This reverts commit `684b0b9d4d`. * Revert "load model before calling convert_fp16_to_fp32" This reverts commit `31606a7d15`. * Revert "bump tinygrad" This reverts commit `44f58ff758`. * Revert "Tinygrad runner (#34171)" This reverts commit `7b5a4fbb03`. * Allow init buffer * typo	2024-12-14 12:15:36 -08:00
Adeeb Shihadeh	4c27878f67	camerad: prep for the BPS (#34244 ) prep Co-authored-by: Comma Device <device@comma.ai>	2024-12-14 11:50:49 -08:00
ZwX1616	684b0b9d4d	dmonitoringmodeld: use cl transform (#34235 ) * needs cleanup * only if tici * bump tinygrad * check width * base modelframe * . * need to be args * more cleanup * no _frame in base * tici only * its DrivingModelFrame * .6 is fair --------- Co-authored-by: Comma Device <device@comma.ai>	2024-12-14 00:44:15 -08:00
Maxime Desroches	b3ad7ef24b	add touch events to qlogs (#34236 ) deci	2024-12-13 21:22:43 -08:00
Louis Velez	93a8d87b34	docs: glossary infra (#34231 ) * feat: glossary infra * fix static analysis error * fix ruff linter error. * updates docs.yaml to use ubuntu-24.04 * code review fixes	2024-12-13 20:28:01 -08:00
Harald Schäfer	8743bc4fe2	Null Pointer Model (#34111 ) * e8cb7f27-e448-4c15-90c2-ac440cd5a042/400 * 0078ad07-4d46-4086-820f-23d61c90e07f/400 * 4bd74082-70af-47da-8156-e84ebf4d4812/400 * 2a074022-5c2c-4628-97f9-f54849a936a6/400 * 0660aa81-93c5-41b7-9cc2-dc8816a512cd/400 * Clip curvature to reasonable limits * Better curvature and speed clips * typo * typo * 31aa62c3-b373-4878-8f2e-5107305de187/400 * 384690ca-9b8a-41fe-9bcd-389b20fc6aa4/400 * ref commit --------- Co-authored-by: Yassine <yassine.y10@gmail.com>	2024-12-13 19:14:21 -08:00
Maxime Desroches	e04ac10509	ci: fix cache key for test_models (#34230 ) * fix this * please rerun this my good ci friend * thank you very much	2024-12-13 17:20:28 -08:00
Maxime Desroches	64db514d41	hardwared: log touch events (#34225 ) * touch * touch * touch * touch * this * valid * better	2024-12-13 16:50:34 -08:00
Shane Smiskol	da2c70e097	Revert "LogReader: fix issue when your dns resolves all requests" (#34229 ) Revert "LogReader: fix issue when your dns resolves all requests (#34089)" This reverts commit `7fc5040ed9`.	2024-12-13 15:11:30 -08:00
Shane Smiskol	d574513879	bump opendbc (#34227 ) * bump * update docs	2024-12-13 13:33:37 -08:00
mitchellgoffpc	31606a7d15	load model before calling convert_fp16_to_fp32	2024-12-12 15:04:54 -08:00
mitchellgoffpc	44f58ff758	bump tinygrad	2024-12-12 14:12:45 -08:00
Shane Smiskol	cd6d9fee3f	Revert "athenad: fix thread safety issues in upload handing" (#34224 ) Revert "athenad: fix thread safety issues in upload handing (#34199)" This reverts commit `dcb3113c4b`.	2024-12-12 13:32:36 -08:00
Maxime Desroches	c1ae9eabf1	ci: skip simulator for external PRs (#34221 ) * try * try * try * try... * skip * cleanup	2024-12-12 11:17:50 -08:00
Harald Schäfer	7b5a4fbb03	Tinygrad runner (#34171 ) * squash * bump tg * bump tg * debump tinygrad * bump tinygrad * bump tg * Skip init iteration * fixes * cleanups * skip first test sample * typos * linter unhappy * update cpu usage * OPENCL just zeros for now * imports * Try printing * Runs again, but slower * unused import * Allow more buffer with tg and all on gpu * bump tinygrad * seems ok * stricter timings for driving looser for dm * try llvm * check nvidia * More timeout for now * make test pass * Revert "try llvm" This reverts commit ef136e478320101fea262bae3579e558da991902. * small fixes * whitespace * revert test timeout * No model runners * Always CPU always fast * No onnx runtime GPU * more cores * cleanup * Is this faster * Is this faster * at least runs * FP32 is faster than 16 * fix deps * whitespace * comment --------- Co-authored-by: Adeeb Shihadeh <adeebshihadeh@gmail.com>	2024-12-11 23:15:20 -08:00
Adeeb Shihadeh	0cf04af227	timed: gate time setting on GPS fix (#34217 )	2024-12-11 20:42:45 -08:00
Adeeb Shihadeh	7a2af78846	camerad: re-enable ISP debayer (#34212 ) camerad: re-enable ISP debayer Co-authored-by: Comma Device <device@comma.ai>	2024-12-11 20:04:40 -08:00
Adeeb Shihadeh	3328845be1	op/switch: fix ambiguous remote on checkout	2024-12-11 19:27:39 -08:00
Dean Lee	3a6db78601	camerad: pass std::vector by const reference (#34206 ) pass std::vector by const reference	2024-12-11 14:09:37 -08:00
Jason Young	7202c5acb8	Webcam fixes (#34211 ) * follow VIPC API change, add logging * use full path for video devices	2024-12-11 14:03:40 -08:00
Maxime Desroches	216ebcaa50	Fix model runtime on PC (#34210 ) exhaustive	2024-12-11 13:27:39 -08:00
Shane Smiskol	1dcdf57395	Toyota: raise max acceleration for TSS2 (#34201 ) * bump * Update ref_commit	2024-12-11 13:05:39 -08:00
DevTekVE	437663ff98	CI: Fix process replay to properly work with local routes (#469 ) Updating test_processes.py to easily upload ref routes locally to git instead of azure	2024-12-11 10:22:45 +01:00
Mike Busuttil	02976db472	Tools: simplified setup documentation (#34204 ) platform agnostic setup script	2024-12-10 21:29:27 -08:00
Jason Wen	03cd00719c	Tools: Update setup command for macOS native setup (#34202 )	2024-12-10 19:05:09 -08:00
Jason Wen	8b54fb8372	Revert "ci: Skip simulator failures" (#475 ) Revert "ci: Skip simulator failures (#470)" This reverts commit `705dd83a2f`.	2024-12-08 20:26:35 -05:00
Jason Wen	0f74e0f760	Sync: `commaai/openpilot:master` into `sunnypilot/openpilot:master-new` (#474 )	2024-12-08 20:12:29 -05:00
Jason Wen	4cccd07fd6	Merge branch 'upstream/master' into sync-20241208 # Conflicts: # .github/workflows/tools_tests.yaml # opendbc_repo	2024-12-08 19:40:20 -05:00
Jason Wen	b42c997f83	LFS: Sync refs from upstream (#472 ) * Adding script to pull from comma's LFS before pushing to ours * updating the script a little to allow pulling all when needed if needed * static .. * format * Fuck the simulator always failing * Apply suggestions from code review * Apply suggestions from code review --------- Co-authored-by: devtekve <devtekve@gmail.com>	2024-12-08 22:03:47 +01:00
Jason Wen	6aaa245e65	Revert "LFS: Sync refs from upstream" (#471 ) Revert "LFS: Sync refs from upstream (#467)" This reverts commit `8132fe9f0e`.	2024-12-08 15:41:04 -05:00
DevTekVE	705dd83a2f	ci: Skip simulator failures (#470 ) bai sim for now	2024-12-08 15:11:38 -05:00
DevTekVE	8132fe9f0e	LFS: Sync refs from upstream (#467 ) * Adding script to pull from comma's LFS before pushing to ours * updating the script a little to allow pulling all when needed if needed * static .. * format * Fuck the simulator always failing * Apply suggestions from code review --------- Co-authored-by: Jason Wen <haibin.wen3@gmail.com>	2024-12-08 14:39:35 -05:00
DevTekVE	5dc5b6accb	ci: Save cache for `master-new` (#468 ) Adding master-new to the whitelisted refs for caching stuff	2024-12-08 13:33:32 -05:00
Jason Wen	cc1bfcf12e	Sync: `commaai/openpilot:master` into `sunnypilot/sunnypilot:master-new` (#465 )	2024-12-05 18:12:23 -05:00
Jason Wen	bd6d207c8a	Merge branch 'upstream/master' into sync-20241205	2024-12-05 17:48:01 -05:00
Jason Wen	777ff8dcb4	Sync: `commaai/openpilot:master` into `sunnypilot/sunnypilot:master-new` (#459 )	2024-11-26 23:44:36 -05:00
Jason Wen	9446dd60d1	Merge branch 'upstream/master' into sync-20241126 # Conflicts: # opendbc_repo	2024-11-26 22:54:12 -05:00
Jason Wen	1deae82f12	Sync: `commaai/openpilot:master` into `sunnypilot/sunnypilot:master-new` (#458 )	2024-11-23 12:28:14 -05:00
Jason Wen	0649653947	Merge branch 'upstream/master' into sync-20241122	2024-11-23 10:26:18 -05:00
DevTekVE	ab65b19ba5	Hyundai: Enhanced Smart Cruise Control (ESCC) (#443 ) * initial updates to the actions to run for sp * ignore license file * more updates * undoing some of the changes because I was blocking the runs on * allowing the submodule check as well * Allowing macos builds * test adding cache key * don't attempt build_release for selfdrive for the time being. * Blocking macos builds as well since they have a 10x miltiplier for GH aciton minutes, waaaay too much! * lol nice typo codespell * change ref commit id to check if replay passes * Sync up submodules for ESCC * Remove ESCC base and interfaces classes Consolidated ESCC functionalities directly into the Hyundai ESCC module, removing redundant base and interface classes. This simplified the class structure and improved code maintainability. * Removing hints because they were causing a circular dependency * bump opendbc * bump opendbc * Bump opendbc * Fixing inconsistencies thanks to the tests! * Bumping opendbc after fixing tests * update opendbc * Updating to the new flags ordinal on cereal * Reverting some of the misc changes, clarifying naming and passing the scc12 and fca11 values to the methods as they are defined out of order * undoing more naming changes to simplify diff * More naming and tiny adjustments * use constant for msg id * bump opendbc * bump submodules * already added * bump opendbc * bump opendbc * bump opendbc * fix init * bump submodules * bump submodules * unit test! * rename * always use true radar tracks if available * update comment * bump submodules --------- Co-authored-by: Jason Wen <haibin.wen3@gmail.com>	2024-11-22 09:07:39 -05:00
DevTekVE	36f8192612	ci: sunnypilot UI GH Action fix ensure push to master-ui artifact (#453 ) Fixes the GitHub action that was failing when the pipeline was running as a result of a push to master-new because it was not properly recognized as "master"	2024-11-17 09:49:37 +01:00
DevTekVE	4b66cd0577	ci: fix repo permissions in ui preview (#450 ) * more fix * forcing a change on UI to test * undo ui change for test * temp add branch * adding more ways to set the master uo * more bruteforce * undoing force	2024-11-16 22:27:29 -05:00
DevTekVE	ad742515f1	ci: sunnypilot show UI diff on PRs (#449 ) add master-new as well to the triggers for the diff ui	2024-11-16 23:45:12 +01:00
Jason Wen	2426354d72	bump opendbc (#448 ) * bump opendbc * Bump opendbc to our changed car params * New Int16 test * spFlags * original name? * Testing more carparams combinations * Another crazy idea * typo * use this field * bump * bump opendbc --------- Co-authored-by: DevTekVE <devtekve@gmail.com>	2024-11-16 17:04:12 -05:00
DevTekVE	cf55992c26	CI: sunnypilot configuration (#447 ) * initial updates to the actions to run for sp * ignore license file * more updates * undoing some of the changes because I was blocking the runs on * allowing the submodule check as well * Allowing macos builds * test adding cache key * don't attempt build_release for selfdrive for the time being. * Blocking macos builds as well since they have a 10x miltiplier for GH aciton minutes, waaaay too much! * lol nice typo codespell * change ref commit id to check if replay passes	2024-11-16 10:24:47 -05:00
DevTekVE	3da0dfd47f	Updating submodules	2024-11-15 08:16:52 +01:00
Jason Wen	6edaf619bf	Add Custom MIT License (#438 )	2024-11-15 08:11:51 +01:00