process replay diff stats (#6736)

* process replay diff stats

* fix tuples
This commit is contained in:
qazal
2024-09-25 15:19:56 +08:00
committed by GitHub
parent 56979aa3ed
commit 5ad2f95d01
4 changed files with 18 additions and 13 deletions

View File

@@ -39,7 +39,7 @@ def diff_schedule(s:List[Tuple[DefaultDict[LBScheduleItem, List[LBScheduleItem]]
if (cache_key:=ref.ast.key+compare.ast.key) in seen_diffs: continue
seen_diffs.add(cache_key)
changed += 1
if CAPTURING_PROCESS_REPLAY: diskcache_put("schedule_diff", str(uuid.uuid4()), (str(buf), [ref.ast.key, compare.ast.key]))
if CAPTURING_PROCESS_REPLAY: diskcache_put("schedule_diff", str(uuid.uuid4()), (str(buf), [ref.ast, compare.ast]))
if not CI: print_si_diff(ref, compare)
if DEBUG >= 1: print(f"*** process replay: {changed} unique kernel{'s' if changed>1 else ''} changed")
return changed

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# compare kernels created by HEAD against master
import os, multiprocessing, logging, pickle, sqlite3
from typing import Callable, List, cast
import os, multiprocessing, logging, pickle, sqlite3, difflib
from typing import Callable, List, Tuple, Union, cast
from tinygrad.helpers import VERSION, Context, ContextVar, db_connection, getenv, tqdm
from tinygrad.codegen.kernel import Kernel
from test.external.process_replay.helpers import print_diff
@@ -39,12 +39,12 @@ def diff_schedule(offset:int) -> bool:
else: print_diff(asts[0], asts[1])
return bool(changed)
def diff_kernel(offset:int) -> bool:
def diff_kernel(offset:int) -> Union[Tuple[int, int], bool]:
if early_stop.is_set(): return True
conn = db_connection()
cur = conn.cursor()
cur.execute(f"SELECT val FROM 'kernel_{TABLE_NAME}' LIMIT ? OFFSET ?", (PAGE_SIZE, offset))
changed = 0
additions, deletions, changed = 0, 0, 0
for row in cur.fetchall():
# try unpickle
try: ast, opts, applied_opts, name, compare_src, ctx = pickle.loads(row[0])
@@ -68,30 +68,35 @@ def diff_kernel(offset:int) -> bool:
# diff kernels
try: assert compare_src == good_src
except AssertionError:
changed += 1
logging.info("PROCESS REPLAY DETECTED CHANGE")
logging.info(ast)
logging.info(applied_opts)
logging.info(ctx.loc)
print_diff(good_src, compare_src)
if ASSERT_DIFF: return True
changes = list(difflib.unified_diff(str(good_src).splitlines(), str(compare_src).splitlines()))
additions += len([x for x in changes if x.startswith("+")])
deletions += len([x for x in changes if x.startswith("-")])
if ASSERT_DIFF: return additions, deletions
if changed > MAX_DIFF_PCT:
logging.warning(f"detected changes in over {MAX_DIFF_PCT}% of kernels. skipping further diff generation.")
early_stop.set()
break
conn.commit()
cur.close()
return bool(changed)
return additions, deletions
# *** generic runner for executing fxn across all rows of a table in parallel
def _pmap(row_count:int, fxn:Callable[[int], bool], maxtasksperchild:int=16) -> None:
def _pmap(row_count:int, fxn:Callable[[int], Union[bool, Tuple[int, int]]], maxtasksperchild:int=16) -> None:
with multiprocessing.get_context("spawn").Pool(multiprocessing.cpu_count(), maxtasksperchild=maxtasksperchild) as pool:
inputs = list(range(0, row_count, PAGE_SIZE))
changed: List[bool] = list(tqdm(pool.imap_unordered(fxn, inputs), total=len(inputs)))
ret: List[Union[bool, Tuple[int, int]]] = list(tqdm(pool.imap_unordered(fxn, inputs), total=len(inputs)))
pool.close()
pool.join()
pool.terminate()
changed = [bool(x[0] or x[1]) if isinstance(x, tuple) else x for x in ret]
insertion, deletions = [x[0] for x in ret if isinstance(x, tuple)], [x[1] for x in ret if isinstance(x, tuple)]
logging.info(f"{sum(changed)} kernels changed{f', {sum(insertion)} insertions(+), {sum(deletions)} deletions(-)' if len(insertion) != 0 else ''}")
if any(changed) and ASSERT_DIFF: raise AssertionError("process replay detected changes")
# *** process replay parallel differ runners

View File

@@ -2,6 +2,6 @@
from tinygrad.helpers import db_connection, VERSION, os
cur = db_connection()
cur.execute(f"drop table if exists kernel_process_replay_{VERSION}")
cur.execute(f"drop table if exists schedule_process_replay_{VERSION}")
cur.execute(f"drop table if exists schedule_diff_{VERSION}")
if os.path.exists(fp:=__file__.replace("reset", "master_schedule")):
os.system(f"rm -rf {fp}")

View File

@@ -36,14 +36,14 @@ void test(int* restrict a, const int* restrict b) {
}
"""
offset = helper_append_replay(ast, "test", test_src)
assert diff_kernel(offset-1)
assert diff_kernel(offset-1) == (5, 4)
def test_identical_run(self):
out = Tensor([1, 2, 3])+1
ast = out.schedule()[-1].ast
test_prg = Kernel(ast, ClangRenderer()).to_program()
offset = helper_append_replay(ast, test_prg.name, test_prg.src)
assert not diff_kernel(offset)
assert diff_kernel(offset) == (0, 0)
if __name__ == "__main__":
unittest.main()