From 8b5bcf309a2ee4f2f6ba3c0f2eee97e5220dd5e3 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Tue, 11 Jun 2024 02:49:29 +0800 Subject: [PATCH] process replay in all of CI (#4884) --- .github/workflows/test.yml | 53 ++++++++++++++++++++++------- test/test_fusion_op.py | 2 -- test/test_search.py | 3 +- test/testextra/test_export_model.py | 2 -- tinygrad/codegen/linearizer.py | 2 +- 5 files changed, 42 insertions(+), 20 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4161d9dddf..25bbaa8171 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,6 +17,28 @@ on: type: boolean jobs: + check_process_replay: + name: Check process replay + runs-on: ubuntu-latest + outputs: + run_process_replay: ${{ steps.set-env.outputs.run_process_replay }} + timeout-minutes: 5 + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + with: + fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR + - name: Set process replay + id: set-env + run: | + COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) + if { echo "$COMMIT_MESSAGE" | grep -q "\[run_process_replay\]" || [ "${{ github.event.inputs.run_process_replay }}" == "true" ]; } && [ "$GITHUB_REF_NAME" != "master" ]; then + echo "RUN_PROCESS_REPLAY=1" >> $GITHUB_OUTPUT + else + echo "RUN_PROCESS_REPLAY=0" >> $GITHUB_OUTPUT + fi + uops: name: uops tests runs-on: ubuntu-latest @@ -126,6 +148,10 @@ jobs: fail-fast: false matrix: task: [optimage, openpilot, onnx] + needs: check_process_replay + #env: + #RUN_PROCESS_REPLAY: ${{ needs.check_process_replay.outputs.run_process_replay }} + name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests' || matrix.task=='openpilot'&&'openpilot (OpenCL) Tests' || matrix.task=='onnx'&&'ONNX+Optimization Tests' }} runs-on: ubuntu-20.04 timeout-minutes: 10 @@ -211,6 +237,9 @@ jobs: - if: ${{ matrix.task == 'onnx' }} name: Test THREEFRY run: PYTHONPATH=. THREEFRY=1 GPU=1 python3 -m pytest test/test_randomness.py test/test_jit.py + - name: Run process replay tests + if: env.RUN_PROCESS_REPLAY == '1' + run: cp test/external/replay_codegen.py ./replay_codegen.py && git fetch origin master && git checkout origin/master && PYTHONPATH=. python3 replay_codegen.py #testwebgpu: # name: WebGPU Tests @@ -256,6 +285,9 @@ jobs: testmetal: name: Metal Tests runs-on: macos-14 + needs: check_process_replay + env: + RUN_PROCESS_REPLAY: ${{ needs.check_process_replay.outputs.run_process_replay }} timeout-minutes: 10 steps: @@ -299,7 +331,9 @@ jobs: run: PYTHONPATH="." METAL=1 CACHELEVEL=0 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=48 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py - name: Fuzz Test models schedule run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py - + - name: Run process replay tests + if: env.RUN_PROCESS_REPLAY == '1' + run: cp test/external/replay_codegen.py ./replay_codegen.py && git fetch origin master && git checkout origin/master && PYTHONPATH=. python3 replay_codegen.py # testwebgl: # name: WebGL Tests @@ -339,6 +373,9 @@ jobs: fail-fast: false matrix: backend: [llvm, clang, gpu, ptx, amd, nv] #, triton] + needs: check_process_replay + env: + RUN_PROCESS_REPLAY: ${{ needs.check_process_replay.outputs.run_process_replay }} name: Tests on (${{ matrix.backend }}) runs-on: ubuntu-latest @@ -347,8 +384,6 @@ jobs: steps: - name: Checkout Code uses: actions/checkout@v4 - with: - fetch-depth: 2 # NOTE: this fetches the HEAD commit of the PR - name: Set up Python 3.11 uses: actions/setup-python@v5 with: @@ -364,12 +399,7 @@ jobs: path: ~/.cache/tinygrad/downloads/ key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }} - name: Set env - run: | - COMMIT_MESSAGE=$(git show -s --format=%B ${{ github.event.pull_request.head.sha }}) - if { echo "$COMMIT_MESSAGE" | grep -q "\[run_process_replay\]" || [ "${{ github.event.inputs.run_process_replay }}" == "true" ]; } && [ "$GITHUB_REF_NAME" != "master" ]; then - echo "RUN_PROCESS_REPLAY=1" >> $GITHUB_ENV - fi - printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV + run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV - name: Install OpenCL if: matrix.backend == 'gpu' run: | @@ -486,10 +516,7 @@ jobs: cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock - name: Run process replay tests if: env.RUN_PROCESS_REPLAY == '1' - run: | - cp test/external/replay_codegen.py ./replay_codegen.py - git fetch origin master && git checkout origin/master - PYTHONPATH=. python3 replay_codegen.py + run: cp test/external/replay_codegen.py ./replay_codegen.py && git fetch origin master && git checkout origin/master && PYTHONPATH=. python3 replay_codegen.py #testunicorn: # name: ARM64 unicorn Test diff --git a/test/test_fusion_op.py b/test/test_fusion_op.py index b0de3aed6b..faab57cf8c 100644 --- a/test/test_fusion_op.py +++ b/test/test_fusion_op.py @@ -4,7 +4,6 @@ import numpy as np from tinygrad import Tensor, dtypes from tinygrad.engine.schedule import create_schedule from tinygrad.engine.realize import lower_schedule_item, run_schedule -from tinygrad.helpers import getenv class TestFusionOp(unittest.TestCase): def test_contiguous_add(self): @@ -23,7 +22,6 @@ class TestFusionOp(unittest.TestCase): outd = out.tolist() assert all(x == 20.0 for x in outd) - @unittest.skipIf(getenv("RUN_PROCESS_REPLAY"), "very slow") def test_recursive_add(self): st = time.perf_counter() a = Tensor([1,2,3,4]) diff --git a/test/test_search.py b/test/test_search.py index 511f838686..057930c514 100644 --- a/test/test_search.py +++ b/test/test_search.py @@ -8,7 +8,7 @@ from tinygrad.device import Device, Buffer from tinygrad.ops import LazyOp, LoadOps, BufferOps, ReduceOps, BinaryOps, MemBuffer, ConstBuffer from tinygrad.tensor import Tensor from tinygrad.dtype import dtypes -from tinygrad.helpers import Context, GlobalCounters, getenv +from tinygrad.helpers import Context, GlobalCounters from tinygrad.engine.realize import capturing from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.view import View @@ -43,7 +43,6 @@ class TestTimeLinearizer(unittest.TestCase): time_linearizer(lin, bufs, allow_test_size=False, cnt=2, disable_cache=True, clear_l2=True) assert GlobalCounters.kernel_count == kernel_count, "kernel count was incremented by time_linearizer" -@unittest.skipIf(getenv("RUN_PROCESS_REPLAY"), "TODO: run process replay for BEAM=2") class TestBEAM(unittest.TestCase): def test_dynamic_beam(self): # TODO: make this infra globally usable diff --git a/test/testextra/test_export_model.py b/test/testextra/test_export_model.py index 9c203f1609..4d0671c39b 100644 --- a/test/testextra/test_export_model.py +++ b/test/testextra/test_export_model.py @@ -1,6 +1,5 @@ import unittest from extra.export_model import export_model, EXPORT_SUPPORTED_DEVICE -from tinygrad.helpers import getenv from tinygrad.tensor import Tensor, Device import json @@ -14,7 +13,6 @@ class MockMultiOutputModel: # TODO: move compile_efficientnet tests here @unittest.skipUnless(Device.DEFAULT in EXPORT_SUPPORTED_DEVICE, f"Model export is not supported on {Device.DEFAULT}") -@unittest.skipIf(getenv("RUN_PROCESS_REPLAY"), "TODO: kernel ordering is non-deterministic") class TextModelExport(unittest.TestCase): def test_multi_input_model_export(self): model = MockMultiInputModel() diff --git a/tinygrad/codegen/linearizer.py b/tinygrad/codegen/linearizer.py index 4d829082a3..dd0395db0f 100644 --- a/tinygrad/codegen/linearizer.py +++ b/tinygrad/codegen/linearizer.py @@ -467,7 +467,7 @@ class Linearizer(Kernel): self.linearize() info = get_lazyop_info(self.ast[0]) src = self.opts.render(to_function_name(self.name), self.uops) - if getenv("RUN_PROCESS_REPLAY"): diskcache_put("process_replay", "".join(map(str,[self.ast,self.applied_opts])), self) + if getenv("RUN_PROCESS_REPLAY"): diskcache_put("process_replay", id(self), self) ops, mem = self.uops.flops_mem() run_count = prod((self.global_size if self.global_size else []) + (self.local_size if self.local_size else [])) # NOTE: we use min here to ignore the indexing FLOPS