Files
opendbc-meb/opendbc/safety/tests/mutation.py
Jason Wen c4f5a4f9d2 Merge branch 'upstream/opendbc/master' into sync-20260401
# Conflicts:
#	docs/CARS.md
#	opendbc/car/car_helpers.py
#	opendbc/car/chrysler/carcontroller.py
#	opendbc/car/chrysler/values.py
#	opendbc/car/honda/carcontroller.py
#	opendbc/car/honda/hondacan.py
#	opendbc/car/hyundai/hyundaicanfd.py
#	opendbc/car/rivian/interface.py
2026-04-02 21:05:06 -04:00

639 lines
23 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import io
import os
import re
import subprocess
import sys
import tempfile
import time
import unittest
from concurrent.futures import ProcessPoolExecutor, as_completed
from collections import Counter, namedtuple
from dataclasses import dataclass
from pathlib import Path
import tree_sitter_c as ts_c
import tree_sitter as ts
ROOT = Path(__file__).resolve().parents[3]
SAFETY_DIR = ROOT / "opendbc" / "safety"
SAFETY_TESTS_DIR = ROOT / "opendbc" / "safety" / "tests"
SAFETY_C_REL = Path("opendbc/safety/tests/libsafety/safety.c")
ANSI_RESET = "\033[0m"
ANSI_BOLD = "\033[1m"
ANSI_RED = "\033[31m"
ANSI_GREEN = "\033[32m"
ANSI_YELLOW = "\033[33m"
COMPARISON_OPERATOR_MAP = {
"==": "!=",
"!=": "==",
">": "<=",
">=": "<",
"<": ">=",
"<=": ">",
}
MUTATOR_FAMILIES = {
"increment": ("update_expression", {"++": "--"}),
"decrement": ("update_expression", {"--": "++"}),
"comparison": ("binary_expression", COMPARISON_OPERATOR_MAP),
"boundary": ("number_literal", {}),
"bitwise_assignment": ("assignment_expression", {"&=": "|=", "|=": "&=", "^=": "&="}),
"bitwise": ("binary_expression", {"&": "|", "|": "&", "^": "&"}),
"arithmetic_assignment": ("assignment_expression", {"+=": "-=", "-=": "+=", "*=": "/=", "/=": "*=", "%=": "*="}),
"arithmetic": ("binary_expression", {"+": "-", "-": "+", "*": "/", "/": "*", "%": "*"}),
"remove_negation": ("unary_expression", {"!": ""}),
}
_RawSite = namedtuple('_RawSite', 'expr_start expr_end op_start op_end line original_op mutated_op mutator')
@dataclass(frozen=True)
class MutationSite:
site_id: int
expr_start: int
expr_end: int
op_start: int
op_end: int
line: int
original_op: str
mutated_op: str
mutator: str
origin_file: Path
origin_line: int
@dataclass(frozen=True)
class MutantResult:
site: MutationSite
outcome: str # killed | survived | infra_error
test_sec: float
details: str
def colorize(text, color):
term = os.environ.get("TERM", "")
if not sys.stdout.isatty() or term in ("", "dumb") or "NO_COLOR" in os.environ:
return text
return f"{color}{text}{ANSI_RESET}"
def format_mutation(original_op, mutated_op):
return colorize(f"{original_op}->{mutated_op}", ANSI_RED)
def _parse_int_literal(token):
m = re.fullmatch(r"([0-9][0-9a-fA-FxX]*)([uUlL]*)", token)
if m is None:
return None
body, suffix = m.groups()
try:
value = int(body, 0)
except ValueError:
return None
base = "hex" if body.lower().startswith("0x") else "dec"
return value, base, suffix
def _site_key(site):
return (site.op_start, site.op_end, site.mutator)
def _is_in_constexpr_context(node):
"""Check if a node is inside a static or file-scope variable initializer."""
current = node.parent
while current is not None:
if current.type == "init_declarator":
decl = current.parent
if decl and decl.type == "declaration":
for child in decl.children:
if child.type == "storage_class_specifier" and child.text == b"static":
return True
if decl.parent and decl.parent.type == "translation_unit":
return True
current = current.parent
return False
def _prepare_for_parsing(txt):
"""Blank line markers and replace __typeof__() for tree-sitter. Preserves byte offsets."""
result = re.sub(
r'^[ \t]*#[ \t]+\d+[ \t]+"[^\n]*',
lambda m: " " * len(m.group()),
txt,
flags=re.MULTILINE,
)
# Replace __typeof__(...) with padded int (handle nested parens)
parts = []
i = 0
for m in re.finditer(r"(?:__typeof__|typeof)\s*\(", result):
if m.start() < i:
continue # skip nested typeof inside already-replaced region
parts.append(result[i:m.start()])
depth = 1
j = m.end()
while j < len(result) and depth > 0:
if result[j] == "(":
depth += 1
elif result[j] == ")":
depth -= 1
j += 1
parts.append("int" + " " * (j - m.start() - 3))
i = j
parts.append(result[i:])
return "".join(parts)
def enumerate_sites(input_source, preprocessed_file):
subprocess.run([
"cc", "-E", "-std=gnu11", "-nostdlib", "-fno-builtin", "-DALLOW_DEBUG",
f"-I{ROOT}", f"-I{ROOT / 'opendbc/safety/board'}",
str(input_source), "-o", str(preprocessed_file),
], cwd=ROOT, capture_output=True, check=True)
txt = preprocessed_file.read_text()
# Build line map from preprocessor directives
line_map = {}
current_map_file = None
current_map_line = None
directive_re = re.compile(r'^\s*#\s*(\d+)\s+"([^"]+)"')
for pp_line_num, pp_line in enumerate(txt.splitlines(keepends=True), start=1):
m = directive_re.match(pp_line)
if m:
current_map_line = int(m.group(1))
current_map_file = Path(m.group(2)).resolve()
continue
if current_map_file is not None and current_map_line is not None:
line_map[pp_line_num] = (current_map_file, current_map_line)
current_map_line += 1
# Parse with tree-sitter
parser = ts.Parser(ts.Language(ts_c.language()))
tree = parser.parse(_prepare_for_parsing(txt).encode())
# Build rule map
rule_map = {}
counts = {}
for mutator, (node_kind, op_map) in MUTATOR_FAMILIES.items():
counts[mutator] = 0
if mutator == "boundary":
continue
for original_op, mutated_op in op_map.items():
rule_map.setdefault((node_kind, original_op), []).append((mutator, original_op, mutated_op))
# Walk tree to find mutation sites
deduped = {}
build_incompatible_keys = set()
stack = [tree.root_node]
while stack:
node = stack.pop()
kind = node.type
# Boundary mutations: find number_literals inside comparison operands
if kind == "binary_expression":
cmp_op = node.child_by_field_name("operator")
if cmp_op and cmp_op.type in COMPARISON_OPERATOR_MAP:
lit_stack = []
for field in ("left", "right"):
operand = node.child_by_field_name(field)
if operand:
lit_stack.append(operand)
while lit_stack:
n = lit_stack.pop()
if n.type == "number_literal":
token = txt[n.start_byte:n.end_byte]
parsed = _parse_int_literal(token)
if parsed:
value, base, suffix = parsed
mutated = f"0x{value + 1:X}{suffix}" if base == "hex" else f"{value + 1}{suffix}"
line = n.start_point[0] + 1
bsite = _RawSite(n.start_byte, n.end_byte, n.start_byte, n.end_byte, line, token, mutated, "boundary")
key = _site_key(bsite)
deduped[key] = bsite
if _is_in_constexpr_context(n):
build_incompatible_keys.add(key)
lit_stack.extend(n.children)
# Operator mutations: any node with an operator child
op_child = node.child_by_field_name("operator")
if op_child:
for mutator, original_op, mutated_op in rule_map.get((kind, op_child.type), []):
line = node.start_point[0] + 1
site = _RawSite(node.start_byte, node.end_byte, op_child.start_byte, op_child.end_byte, line, original_op, mutated_op, mutator)
key = _site_key(site)
deduped[key] = site
if _is_in_constexpr_context(node):
build_incompatible_keys.add(key)
stack.extend(node.children)
sites = sorted(deduped.values(), key=lambda s: (s.op_start, s.mutator))
out = []
build_incompatible_site_ids = set()
for s in sites:
mapped = line_map.get(s.line)
if mapped is None:
continue
origin_file, origin_line = mapped
if SAFETY_DIR not in origin_file.parents and origin_file != SAFETY_DIR:
continue
site_id = len(out)
site = MutationSite(
site_id=site_id, expr_start=s.expr_start, expr_end=s.expr_end,
op_start=s.op_start, op_end=s.op_end, line=s.line,
original_op=s.original_op, mutated_op=s.mutated_op, mutator=s.mutator,
origin_file=origin_file, origin_line=origin_line,
)
if _site_key(s) in build_incompatible_keys:
build_incompatible_site_ids.add(site_id)
out.append(site)
counts[s.mutator] += 1
return out, counts, build_incompatible_site_ids, txt
def _build_core_tests(catalog):
"""Build test ordering for core (non-mode) files.
One test per unique method name from evenly-spaced modules,
ordered by how widely each method is shared. Methods inherited by many
classes exercise the most fundamental safety logic and run first.
"""
MAX_PER_METHOD = 5
method_freq = {}
method_by_module = {}
for name in sorted(catalog.keys()):
for test_id in catalog[name]:
method = test_id.rsplit(".", 1)[-1]
method_freq[method] = method_freq.get(method, 0) + 1
if method not in method_by_module:
method_by_module[method] = {}
if name not in method_by_module[method]:
method_by_module[method][name] = test_id
# Pick evenly-spaced modules for each method to maximize configuration diversity
method_ids = {}
for method, module_map in method_by_module.items():
modules = sorted(module_map.keys())
n = len(modules)
if n <= MAX_PER_METHOD:
method_ids[method] = [module_map[m] for m in modules]
else:
step = n / MAX_PER_METHOD
method_ids[method] = [module_map[modules[int(i * step)]] for i in range(MAX_PER_METHOD)]
# Round-robin: first instance of each method (by freq), then second, etc.
# This ensures diverse early coverage with failfast.
sorted_methods = sorted(method_freq, key=lambda m: -method_freq[m])
ordered = []
for round_idx in range(MAX_PER_METHOD):
for m in sorted_methods:
ids = method_ids.get(m, [])
if round_idx < len(ids):
ordered.append(ids[round_idx])
return ordered
def build_priority_tests(site, catalog, core_tests):
"""Build an ordered list of test IDs for a mutation site.
For mode files: all tests from the matching test_<mode>.py module.
For core files: uses the pre-computed core_tests ordering.
"""
src = site.origin_file
rel_parts = src.relative_to(ROOT).parts
is_mode = len(rel_parts) >= 4 and rel_parts[:3] == ("opendbc", "safety", "modes")
if is_mode:
mode_file = f"test_{src.stem}.py"
return list(catalog.get(mode_file, []))
return core_tests
def format_site_snippet(site, context_lines=2):
source = site.origin_file
text = source.read_text()
lines = text.splitlines()
display_ln = site.origin_line
line_idx = display_ln - 1
start = max(0, line_idx - context_lines)
end = min(len(lines), line_idx + context_lines + 1)
line_text = lines[line_idx]
rel_start = line_text.find(site.original_op)
if rel_start < 0:
rel_start = 0
rel_end = rel_start + len(site.original_op)
snippet_lines = []
width = len(str(end))
for idx in range(start, end):
num = idx + 1
prefix = ">" if idx == line_idx else " "
line = lines[idx]
if idx == line_idx:
marker = colorize(f"[[{site.original_op}->{site.mutated_op}]]", ANSI_RED)
line = f"{line[:rel_start]}{marker}{line[rel_end:]}"
snippet_lines.append(f"{prefix} {num:>{width}} | {line}")
return "\n".join(snippet_lines)
def render_progress(completed, total, killed, survived, infra, elapsed_sec):
bar_width = 30
filled = int((completed / total) * bar_width)
bar = "#" * filled + "-" * (bar_width - filled)
rate = completed / elapsed_sec if elapsed_sec > 0 else 0.0
remaining = total - completed
eta = (remaining / rate) if rate > 0 else 0.0
killed_text = colorize(f"k:{killed}", ANSI_GREEN)
survived_text = colorize(f"s:{survived}", ANSI_RED)
infra_text = colorize(f"i:{infra}", ANSI_YELLOW)
return f"[{bar}] {completed}/{total} {killed_text} {survived_text} {infra_text} mps:{rate:.2f} elapsed:{elapsed_sec:.1f}s eta:{eta:.1f}s"
def print_live_status(text, *, final=False):
if sys.stdout.isatty():
print("\r" + text, end="\n" if final else "", flush=True)
else:
print(text, flush=True)
def _discover_test_catalog():
loader = unittest.TestLoader()
catalog = {}
for test_file in sorted(SAFETY_TESTS_DIR.glob("test_*.py")):
module_name = ".".join(test_file.relative_to(ROOT).with_suffix("").parts)
suite = loader.loadTestsFromName(module_name)
catalog[test_file.name] = [t.id() for group in suite for t in group]
return catalog
def run_unittest(targets, lib_path, mutant_id, verbose):
from opendbc.safety.tests.libsafety import libsafety_py
libsafety_py.load(lib_path)
libsafety_py.libsafety.mutation_set_active_mutant(mutant_id)
if verbose:
print("Running unittest targets:", ", ".join(targets), flush=True)
loader = unittest.TestLoader()
stream = io.StringIO()
runner = unittest.TextTestRunner(stream=stream, verbosity=0, failfast=True)
suite = unittest.TestSuite()
for target in targets:
suite.addTests(loader.loadTestsFromName(target))
result = runner.run(suite)
if result.failures:
return result.failures[0][0].id()
if result.errors:
return result.errors[0][0].id()
return None
def _instrument_source(source, sites):
# Sort by start ascending, end descending (outermost first when same start)
sorted_sites = sorted(sites, key=lambda s: (s.expr_start, -s.expr_end))
# Build containment forest using a stack
roots = []
stack = []
for site in sorted_sites:
while stack and stack[-1][0].expr_end <= site.expr_start:
stack.pop()
node = [site, []]
if stack:
stack[-1][1].append(node)
else:
roots.append(node)
stack.append(node)
def build_replacement(site, children):
parts = []
pos = site.expr_start
op_rel = None
running_len = 0
for child_site, child_children in children:
seg = source[pos : child_site.expr_start]
if op_rel is None and site.op_start >= pos and site.op_start < child_site.expr_start:
op_rel = running_len + (site.op_start - pos)
parts.append(seg)
running_len += len(seg)
child_repl = build_replacement(child_site, child_children)
parts.append(child_repl)
running_len += len(child_repl)
pos = child_site.expr_end
seg = source[pos : site.expr_end]
if op_rel is None and site.op_start >= pos:
op_rel = running_len + (site.op_start - pos)
parts.append(seg)
expr_text = "".join(parts)
op_len = site.op_end - site.op_start
assert op_rel is not None and expr_text[op_rel : op_rel + op_len] == site.original_op, (
f"Operator mismatch (site_id={site.site_id}): expected {site.original_op!r} at offset {op_rel}"
)
mutated_expr = f"{expr_text[:op_rel]}{site.mutated_op}{expr_text[op_rel + op_len :]}"
return f"((__mutation_active_id == {site.site_id}) ? ({mutated_expr}) : ({expr_text}))"
result_parts = []
pos = 0
for site, children in roots:
result_parts.append(source[pos : site.expr_start])
result_parts.append(build_replacement(site, children))
pos = site.expr_end
result_parts.append(source[pos:])
return "".join(result_parts)
def compile_mutated_library(preprocessed_source, sites, output_so):
instrumented = _instrument_source(preprocessed_source, sites)
prelude = """
static int __mutation_active_id = -1;
void mutation_set_active_mutant(int id) { __mutation_active_id = id; }
int mutation_get_active_mutant(void) { return __mutation_active_id; }
"""
marker_re = re.compile(r'^\s*#\s+\d+\s+"[^\n]*\n?', re.MULTILINE)
instrumented = prelude + marker_re.sub("", instrumented)
mutation_source = output_so.with_suffix(".c")
mutation_source.write_text(instrumented)
subprocess.run([
"cc", "-shared", "-fPIC", "-w", "-fno-builtin", "-std=gnu11",
"-g0", "-O0", "-DALLOW_DEBUG",
str(mutation_source), "-o", str(output_so),
], cwd=ROOT, check=True)
def eval_mutant(site, targets, lib_path, verbose):
try:
t0 = time.perf_counter()
failed_test = run_unittest(targets, lib_path, mutant_id=site.site_id, verbose=verbose)
duration = time.perf_counter() - t0
if failed_test is not None:
return MutantResult(site, "killed", duration, "")
return MutantResult(site, "survived", duration, "")
except Exception as exc:
return MutantResult(site, "infra_error", 0.0, str(exc))
def main():
parser = argparse.ArgumentParser(description="Run strict safety mutation")
parser.add_argument("-j", type=int, default=max((os.cpu_count() or 1) - 1, 1), help="parallel mutants to run")
parser.add_argument("--max-mutants", type=int, default=0, help="optional limit for debugging (0 means all)")
parser.add_argument("--list-only", action="store_true", help="list discovered candidates and exit")
parser.add_argument("--verbose", action="store_true", help="print extra debug output")
args = parser.parse_args()
start = time.perf_counter()
with tempfile.TemporaryDirectory(prefix="mutation-op-run-") as run_tmp_dir:
preprocessed_file = Path(run_tmp_dir) / "safety_preprocessed.c"
sites, mutator_counts, build_incompatible_ids, preprocessed_source = enumerate_sites(ROOT / SAFETY_C_REL, preprocessed_file)
assert len(sites) > 0
if args.max_mutants > 0:
sites = sites[: args.max_mutants]
mutator_summary = ", ".join(f"{name} ({c})" for name in MUTATOR_FAMILIES if (c := mutator_counts.get(name, 0)) > 0)
print(f"Found {len(sites)} unique candidates: {mutator_summary}", flush=True)
if args.list_only:
for site in sites:
mutation = format_mutation(site.original_op, site.mutated_op)
print(f" #{site.site_id:03d} {site.origin_file.relative_to(ROOT)}:{site.origin_line} [{site.mutator}] {mutation}")
return 0
print(f"Running {len(sites)} mutants with {args.j} workers", flush=True)
discovered_count = len(sites)
selected_site_ids = {s.site_id for s in sites}
build_incompatible_ids &= selected_site_ids
pruned_compile_sites = len(build_incompatible_ids)
if pruned_compile_sites > 0:
sites = [s for s in sites if s.site_id not in build_incompatible_ids]
print(f"Pruned {pruned_compile_sites} build-incompatible mutants from constant-expression initializers", flush=True)
if not sites:
print("Failed to build mutation library: all sites were pruned as build-incompatible", flush=True)
return 2
mutation_lib = Path(run_tmp_dir) / "libsafety_mutation.so"
compile_mutated_library(preprocessed_source, sites, mutation_lib)
# Discover all tests by importing modules in the main process.
# Forked workers inherit these imports, eliminating per-worker import cost.
catalog = _discover_test_catalog()
# Baseline smoke check
baseline_ids = catalog.get("test_defaults.py", [])[:5]
baseline_failed = run_unittest(baseline_ids, mutation_lib, mutant_id=-1, verbose=args.verbose)
if baseline_failed is not None:
print("Baseline smoke failed with mutant_id=-1; aborting to avoid false kill signals.", flush=True)
print(f" failed_test: {baseline_failed}", flush=True)
return 2
# Pre-compute test targets per mutation site
core_tests = _build_core_tests(catalog)
site_targets = {site.site_id: build_priority_tests(site, catalog, core_tests) for site in sites}
results = []
counts = Counter()
with ProcessPoolExecutor(max_workers=args.j) as pool:
future_map = {
pool.submit(eval_mutant, site, site_targets[site.site_id], mutation_lib, args.verbose): site for site in sites
}
print_live_status(render_progress(0, len(sites), 0, 0, 0, 0.0))
try:
for fut in as_completed(future_map):
try:
res = fut.result()
except Exception:
site = future_map[fut]
res = MutantResult(site, "killed", 0.0, "worker process crashed")
results.append(res)
counts[res.outcome] += 1
elapsed_now = time.perf_counter() - start
done = len(results) == len(sites)
print_live_status(render_progress(len(results), len(sites), counts["killed"], counts["survived"],
counts["infra_error"], elapsed_now), final=done)
except Exception:
# Pool broken — mark all unfinished mutants as killed (crash = behavioral change detected)
completed_ids = {r.site.site_id for r in results}
for site in sites:
if site.site_id not in completed_ids:
results.append(MutantResult(site, "killed", 0.0, "pool broken"))
counts["killed"] += 1
elapsed_now = time.perf_counter() - start
print_live_status(render_progress(len(results), len(sites), counts["killed"], counts["survived"], counts["infra_error"], elapsed_now), final=True)
survivors = sorted((r for r in results if r.outcome == "survived"), key=lambda r: r.site.site_id)
if survivors:
print("", flush=True)
print(colorize("Surviving mutants", ANSI_RED), flush=True)
for res in survivors:
loc = f"{res.site.origin_file.relative_to(ROOT)}:{res.site.origin_line}"
mutation = format_mutation(res.site.original_op, res.site.mutated_op)
print(f"- #{res.site.site_id} {loc} [{res.site.mutator}] {mutation}", flush=True)
print(format_site_snippet(res.site), flush=True)
infra_results = sorted((r for r in results if r.outcome == "infra_error"), key=lambda r: r.site.site_id)
if infra_results:
print("", flush=True)
print(colorize("Infra errors", ANSI_YELLOW), flush=True)
for res in infra_results:
loc = f"{res.site.origin_file.relative_to(ROOT)}:{res.site.origin_line}"
detail = res.details.splitlines()[0] if res.details else "unknown error"
print(f"- #{res.site.site_id} {loc}: {detail}", flush=True)
elapsed = time.perf_counter() - start
total_test_sec = sum(r.test_sec for r in results)
print("", flush=True)
print(colorize("Mutation summary", ANSI_BOLD), flush=True)
print(f" discovered: {discovered_count}", flush=True)
print(f" pruned_build_incompatible: {pruned_compile_sites}", flush=True)
print(f" total: {len(sites)}", flush=True)
print(f" killed: {colorize(str(counts['killed']), ANSI_GREEN)}", flush=True)
print(f" survived: {colorize(str(counts['survived']), ANSI_RED)}", flush=True)
print(f" infra_error: {colorize(str(counts['infra_error']), ANSI_YELLOW)}", flush=True)
print(f" test_time_sum: {total_test_sec:.2f}s", flush=True)
print(f" avg_test_per_mutant: {total_test_sec / len(results):.3f}s", flush=True)
print(f" mutants_per_second: {len(sites) / elapsed:.2f}", flush=True)
print(f" elapsed: {elapsed:.2f}s", flush=True)
if counts["infra_error"] > 0:
return 2
# TODO: fix these surviving mutants and delete this block
known_survivors = {
("opendbc/safety/helpers.h", 40, "arithmetic"),
("opendbc/safety/lateral.h", 110, "boundary"),
("opendbc/safety/lateral.h", 200, "boundary"),
("opendbc/safety/lateral.h", 244, "boundary"),
("opendbc/safety/lateral.h", 342, "arithmetic"),
("opendbc/safety/sunnypilot/mads.h", 66, "comparison"),
("opendbc/safety/sunnypilot/mads.h", 149, "boundary"),
("opendbc/safety/sunnypilot/mads.h", 150, "boundary"),
("opendbc/safety/sunnypilot/mads.h", 151, "boundary"),
("opendbc/safety/longitudinal.h", 38, "remove_negation"),
}
survivors = [r for r in survivors if (str(r.site.origin_file.relative_to(ROOT)), r.site.origin_line, r.site.mutator) not in known_survivors]
if survivors:
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())