Process replay: add diff report (#37048)

* rm upload

* use ci-artifacts

* sanitize

* rm ref_commit

* add ci

* handle exept

* bootstrap

* always

* fix

* replay

* keep ref_commit fork compatibility

* remove upload-only

* apply comments

* safe diffs in master

* Revert "safe diffs in master"

This reverts commit 369fccac786a67799193e9152488813c6df20414.

* continue on master diff

* imports

* copy formatting from car_diff

* main

* setup refs and cur

* copy diff

* copy formatting

* comment

* rm token

* rm hash

* continue on master diff

* use ci-artifacts refs

* add run card diff

* checkout

* shebang

* card_diff.yml

* rm ci-artifacts

* apply ci-artifacts

* call differ

* rename

* uv lock

* tests

* readme

* checkout

* add all configs

* import base_url

* rename yaml

* integrate in test_processes

* fix diff report

* var names

* extract to module

* print report

* add msg count to diff

* traceback

* diff format

* typing

* name step

* allow NaN

* replace join
This commit is contained in:
Daniel Koepping
2026-03-23 09:41:52 -07:00
committed by GitHub
parent 5766202763
commit f4b8384332
4 changed files with 165 additions and 6 deletions

45
.github/workflows/diff_report.yaml vendored Normal file
View File

@@ -0,0 +1,45 @@
name: diff report
on:
pull_request_target:
types: [opened, synchronize, reopened]
jobs:
comment:
name: comment
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
pull-requests: write
actions: read
steps:
- name: Wait for process replay
id: wait
continue-on-error: true
uses: lewagon/wait-on-check-action@v1.3.4
with:
ref: ${{ github.event.pull_request.head.sha }}
check-name: process replay
repo-token: ${{ secrets.GITHUB_TOKEN }}
allowed-conclusions: success,failure
wait-interval: 20
- name: Download diff
if: steps.wait.outcome == 'success'
uses: dawidd6/action-download-artifact@v6
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: tests.yaml
workflow_conclusion: ''
pr: ${{ github.event.number }}
name: diff_report_${{ github.event.number }}
path: .
allow_forks: true
- name: Comment on PR
if: steps.wait.outcome == 'success'
uses: thollander/actions-comment-pull-request@v2
with:
filePath: diff_report.txt
comment_tag: diff_report
pr_number: ${{ github.event.number }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -139,12 +139,22 @@ jobs:
id: print-diff
if: always()
run: cat selfdrive/test/process_replay/diff.txt
- name: Print diff report
if: always()
run: cat selfdrive/test/process_replay/diff_report.txt
- uses: actions/upload-artifact@v6
if: always()
continue-on-error: true
with:
name: process_replay_diff.txt
path: selfdrive/test/process_replay/diff.txt
- name: Upload diff report
uses: actions/upload-artifact@v6
if: always() && github.event_name == 'pull_request'
continue-on-error: true
with:
name: diff_report_${{ github.event.number }}
path: selfdrive/test/process_replay/diff_report.txt
- name: Checkout ci-artifacts
if: github.repository == 'commaai/openpilot' && github.ref == 'refs/heads/master'
uses: actions/checkout@v4

View File

@@ -0,0 +1,92 @@
import os
from collections import defaultdict
from opendbc.car.tests.car_diff import format_diff, format_numeric_diffs
from openpilot.selfdrive.test.process_replay.compare_logs import compare_logs
from openpilot.selfdrive.test.process_replay.process_replay import PROC_REPLAY_DIR
class MsgWrap:
"""Adapter so to_dict() includes defaults"""
def __init__(self, msg):
self._msg = msg
def to_dict(self) -> dict:
return self._msg.to_dict(verbose=True)
def diff_process(cfg, ref_msgs, new_msgs) -> tuple | None:
ref = defaultdict(list)
new = defaultdict(list)
for m in ref_msgs:
if m.which() in cfg.subs:
ref[m.which()].append(m)
for m in new_msgs:
if m.which() in cfg.subs:
new[m.which()].append(m)
diffs = []
for sub in cfg.subs:
if len(ref[sub]) != len(new[sub]):
diffs.append((f"{sub} (message count)", 0, (len(ref[sub]), len(new[sub])), 0))
for i, (r, n) in enumerate(zip(ref[sub], new[sub], strict=False)):
for d in compare_logs([r], [n], cfg.ignore, tolerance=cfg.tolerance):
if d[0] == "change":
a, b = d[2]
if a != a and b != b:
continue
diffs.append((d[1], i, d[2], r.logMonoTime))
elif d[0] in ("add", "remove"):
for item in d[2]:
if item[1] != item[1]:
continue
diffs.append((f"{d[1]}.{item[0]}", i, (d[0], item[1]), r.logMonoTime))
return (diffs, ref, new) if diffs else None
def diff_format(diffs, ref, new, field) -> list[str]:
if any(part.isdigit() for part in field.split(".")):
return format_numeric_diffs(diffs)
msg_type = field.split(".")[0]
ref_ts = [(m.logMonoTime, MsgWrap(m)) for m in ref.get(msg_type, [])]
new_wrapped = [MsgWrap(m) for m in new.get(msg_type, [])]
return format_diff(diffs, ref_ts, new_wrapped, field)
def diff_report(replay_diffs, segments) -> None:
seg_to_plat = {seg: plat for plat, seg in segments}
with_diffs, errors, n_passed = [], [], 0
for seg, proc, data in replay_diffs:
plat = seg_to_plat.get(seg, "UNKNOWN")
if data is None:
n_passed += 1
elif isinstance(data, str):
errors.append((plat, seg, proc, data))
else:
with_diffs.append((plat, seg, proc, data))
icon = "⚠️" if with_diffs else ""
lines = [
"## Process replay diff report",
"Replays driving segments through this PR and compares the behavior to master.",
"Please review any changes carefully to ensure they are expected.\n",
f"{icon} {len(with_diffs)} changed, {n_passed} passed, {len(errors)} errors",
]
for plat, seg, proc, err in errors:
lines.append(f"\nERROR {plat} - {seg} [{proc}]: {err}")
if with_diffs:
lines.append("<details><summary><b>Show changes</b></summary>\n\n```")
for plat, seg, proc, (diffs, ref, new) in with_diffs:
lines.append(f"\n{plat} - {seg} [{proc}]")
by_field = defaultdict(list)
for d in diffs:
by_field[d[0]].append(d)
for field, fd in sorted(by_field.items()):
lines.append(f"\n {field} ({len(fd)} diffs)")
lines.extend(diff_format(fd, ref, new, field))
lines.append("```\n</details>")
with open(os.path.join(PROC_REPLAY_DIR, "diff_report.txt"), "w") as f:
f.write("\n".join(lines))

View File

@@ -3,6 +3,7 @@ import argparse
import concurrent.futures
import os
import sys
import traceback
from collections import defaultdict
from tqdm import tqdm
from typing import Any
@@ -11,6 +12,7 @@ from opendbc.car.car_helpers import interface_names
from openpilot.common.git import get_commit
from openpilot.tools.lib.openpilotci import get_url
from openpilot.selfdrive.test.process_replay.compare_logs import compare_logs, format_diff
from openpilot.selfdrive.test.process_replay.diff_report import diff_process, diff_report
from openpilot.selfdrive.test.process_replay.process_replay import CONFIGS, PROC_REPLAY_DIR, FAKEDATA, replay_process, \
check_most_messages_valid
from openpilot.tools.lib.filereader import FileReader
@@ -72,11 +74,16 @@ EXCLUDED_PROCS = {"modeld", "dmonitoringmodeld"}
def run_test_process(data):
segment, cfg, args, cur_log_fn, ref_log_path, lr_dat = data
ref_log_msgs = list(LogReader(ref_log_path))
lr = LogReader.from_bytes(lr_dat)
res, log_msgs = test_process(cfg, lr, segment, ref_log_path, cur_log_fn, args.ignore_fields, args.ignore_msgs)
res, log_msgs = test_process(cfg, lr, segment, ref_log_msgs, cur_log_fn, args.ignore_fields, args.ignore_msgs)
# save logs so we can update refs
save_log(cur_log_fn, log_msgs)
return (segment, cfg.proc_name, res)
try:
diff_data = diff_process(cfg, ref_log_msgs, log_msgs)
except Exception:
diff_data = traceback.format_exc()
return (segment, cfg.proc_name, res, diff_data)
def get_log_data(segment):
@@ -85,14 +92,12 @@ def get_log_data(segment):
return (segment, f.read())
def test_process(cfg, lr, segment, ref_log_path, new_log_path, ignore_fields=None, ignore_msgs=None):
def test_process(cfg, lr, segment, ref_log_msgs, new_log_path, ignore_fields=None, ignore_msgs=None):
if ignore_fields is None:
ignore_fields = []
if ignore_msgs is None:
ignore_msgs = []
ref_log_msgs = list(LogReader(ref_log_path))
try:
log_msgs = replay_process(cfg, lr, disable_progress=True)
except Exception as e:
@@ -201,9 +206,11 @@ if __name__ == "__main__":
log_paths[segment][cfg.proc_name]['new'] = cur_log_fn
results: Any = defaultdict(dict)
diffs: list = []
p2 = pool.map(run_test_process, pool_args)
for (segment, proc, result) in tqdm(p2, desc="Running Tests", total=len(pool_args)):
for (segment, proc, result, diff_data) in tqdm(p2, desc="Running Tests", total=len(pool_args)):
results[segment][proc] = result
diffs.append((segment, proc, diff_data))
diff_short, diff_long, failed = format_diff(results, log_paths, ref_commit)
if not args.update_refs:
@@ -211,6 +218,11 @@ if __name__ == "__main__":
f.write(diff_long)
print(diff_short)
try:
diff_report(diffs, segments)
except Exception:
print(f"failed to generate diff report:\n{traceback.format_exc()}")
if failed:
print("TEST FAILED")
else: