From 5ad2f95d01784cb76bcd0579d2da51a706b7170b Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:19:56 +0800 Subject: [PATCH] process replay diff stats (#6736) * process replay diff stats * fix tuples --- test/external/process_replay/diff_schedule.py | 2 +- .../external/process_replay/process_replay.py | 23 +++++++++++-------- test/external/process_replay/reset.py | 2 +- .../process_replay/test_process_replay.py | 4 ++-- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/test/external/process_replay/diff_schedule.py b/test/external/process_replay/diff_schedule.py index a432196b33..791df044ab 100644 --- a/test/external/process_replay/diff_schedule.py +++ b/test/external/process_replay/diff_schedule.py @@ -39,7 +39,7 @@ def diff_schedule(s:List[Tuple[DefaultDict[LBScheduleItem, List[LBScheduleItem]] if (cache_key:=ref.ast.key+compare.ast.key) in seen_diffs: continue seen_diffs.add(cache_key) changed += 1 - if CAPTURING_PROCESS_REPLAY: diskcache_put("schedule_diff", str(uuid.uuid4()), (str(buf), [ref.ast.key, compare.ast.key])) + if CAPTURING_PROCESS_REPLAY: diskcache_put("schedule_diff", str(uuid.uuid4()), (str(buf), [ref.ast, compare.ast])) if not CI: print_si_diff(ref, compare) if DEBUG >= 1: print(f"*** process replay: {changed} unique kernel{'s' if changed>1 else ''} changed") return changed diff --git a/test/external/process_replay/process_replay.py b/test/external/process_replay/process_replay.py index 2bcb724a01..1799cdf434 100755 --- a/test/external/process_replay/process_replay.py +++ b/test/external/process_replay/process_replay.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # compare kernels created by HEAD against master -import os, multiprocessing, logging, pickle, sqlite3 -from typing import Callable, List, cast +import os, multiprocessing, logging, pickle, sqlite3, difflib +from typing import Callable, List, Tuple, Union, cast from tinygrad.helpers import VERSION, Context, ContextVar, db_connection, getenv, tqdm from tinygrad.codegen.kernel import Kernel from test.external.process_replay.helpers import print_diff @@ -39,12 +39,12 @@ def diff_schedule(offset:int) -> bool: else: print_diff(asts[0], asts[1]) return bool(changed) -def diff_kernel(offset:int) -> bool: +def diff_kernel(offset:int) -> Union[Tuple[int, int], bool]: if early_stop.is_set(): return True conn = db_connection() cur = conn.cursor() cur.execute(f"SELECT val FROM 'kernel_{TABLE_NAME}' LIMIT ? OFFSET ?", (PAGE_SIZE, offset)) - changed = 0 + additions, deletions, changed = 0, 0, 0 for row in cur.fetchall(): # try unpickle try: ast, opts, applied_opts, name, compare_src, ctx = pickle.loads(row[0]) @@ -68,30 +68,35 @@ def diff_kernel(offset:int) -> bool: # diff kernels try: assert compare_src == good_src except AssertionError: - changed += 1 logging.info("PROCESS REPLAY DETECTED CHANGE") logging.info(ast) logging.info(applied_opts) logging.info(ctx.loc) print_diff(good_src, compare_src) - if ASSERT_DIFF: return True + changes = list(difflib.unified_diff(str(good_src).splitlines(), str(compare_src).splitlines())) + additions += len([x for x in changes if x.startswith("+")]) + deletions += len([x for x in changes if x.startswith("-")]) + if ASSERT_DIFF: return additions, deletions if changed > MAX_DIFF_PCT: logging.warning(f"detected changes in over {MAX_DIFF_PCT}% of kernels. skipping further diff generation.") early_stop.set() break conn.commit() cur.close() - return bool(changed) + return additions, deletions # *** generic runner for executing fxn across all rows of a table in parallel -def _pmap(row_count:int, fxn:Callable[[int], bool], maxtasksperchild:int=16) -> None: +def _pmap(row_count:int, fxn:Callable[[int], Union[bool, Tuple[int, int]]], maxtasksperchild:int=16) -> None: with multiprocessing.get_context("spawn").Pool(multiprocessing.cpu_count(), maxtasksperchild=maxtasksperchild) as pool: inputs = list(range(0, row_count, PAGE_SIZE)) - changed: List[bool] = list(tqdm(pool.imap_unordered(fxn, inputs), total=len(inputs))) + ret: List[Union[bool, Tuple[int, int]]] = list(tqdm(pool.imap_unordered(fxn, inputs), total=len(inputs))) pool.close() pool.join() pool.terminate() + changed = [bool(x[0] or x[1]) if isinstance(x, tuple) else x for x in ret] + insertion, deletions = [x[0] for x in ret if isinstance(x, tuple)], [x[1] for x in ret if isinstance(x, tuple)] + logging.info(f"{sum(changed)} kernels changed{f', {sum(insertion)} insertions(+), {sum(deletions)} deletions(-)' if len(insertion) != 0 else ''}") if any(changed) and ASSERT_DIFF: raise AssertionError("process replay detected changes") # *** process replay parallel differ runners diff --git a/test/external/process_replay/reset.py b/test/external/process_replay/reset.py index 9f1913bd5d..8d28194df2 100755 --- a/test/external/process_replay/reset.py +++ b/test/external/process_replay/reset.py @@ -2,6 +2,6 @@ from tinygrad.helpers import db_connection, VERSION, os cur = db_connection() cur.execute(f"drop table if exists kernel_process_replay_{VERSION}") -cur.execute(f"drop table if exists schedule_process_replay_{VERSION}") +cur.execute(f"drop table if exists schedule_diff_{VERSION}") if os.path.exists(fp:=__file__.replace("reset", "master_schedule")): os.system(f"rm -rf {fp}") diff --git a/test/external/process_replay/test_process_replay.py b/test/external/process_replay/test_process_replay.py index 2490289981..0f26ac941c 100644 --- a/test/external/process_replay/test_process_replay.py +++ b/test/external/process_replay/test_process_replay.py @@ -36,14 +36,14 @@ void test(int* restrict a, const int* restrict b) { } """ offset = helper_append_replay(ast, "test", test_src) - assert diff_kernel(offset-1) + assert diff_kernel(offset-1) == (5, 4) def test_identical_run(self): out = Tensor([1, 2, 3])+1 ast = out.schedule()[-1].ast test_prg = Kernel(ast, ClangRenderer()).to_program() offset = helper_append_replay(ast, test_prg.name, test_prg.src) - assert not diff_kernel(offset) + assert diff_kernel(offset) == (0, 0) if __name__ == "__main__": unittest.main()