all realize 2 (#4527)

* all realize 2

* tests fixup

* fix more tests

* fix openpilot

* fix tests

* unneeded
This commit is contained in:
George Hotz
2024-05-10 22:43:09 -07:00
committed by GitHub
parent d2c347fc74
commit 2f970a4fc2
21 changed files with 142 additions and 139 deletions

View File

@@ -59,9 +59,6 @@ tensor_cores: Dict[str, List[TensorCore]] = {
"HSA": [TensorCore(dims=(16,16,16), threads=[(0,8),(0,2),(1,2)], thread_local_sizes=[[16],[16],[4,2]], thread_local_aliases=[ [[0],[0],[2],[-1],[1]], [[1],[2],[0],[-1],[0]], [[1],[2],[-2],[0],[3,-1]] ], dtype_in=di, dtype_out=do) for (di, do) in [(dtypes.half, dtypes.float), (dtypes.half, dtypes.half)]], # noqa: E501
"CUDA": [TensorCore(dims=(8,16,16), threads=[(0,2),(0,2),(1,2),(1,2),(0,2)], thread_local_sizes=[[2,2,2],[2,2],[2,2]], thread_local_aliases=[ [[0],[0],[5],[-2],[0],[-1,1,2,-3],[3,4]], [[3],[4],[0],[0],[5],[-1,1,2,-2],[0]], [[-1],[1],[5],[-2],[2],[0],[3,4]] ], dtype_in=di, dtype_out=do) for (di, do) in ([(dtypes.half, dtypes.float)] if getenv("PTX") else [(dtypes.half, dtypes.float), (dtypes.bfloat16, dtypes.float)])], # noqa: E501
}
tensor_cores["AMD"] = tensor_cores["HSA"]
tensor_cores["RHIP"] = tensor_cores["HSA"]
tensor_cores["NV"] = tensor_cores["CUDA"]
class LocalBuffer(NamedTuple):
name: str