From 9a95d87366a3bc6eb25e1fb24feefb95d7e72e52 Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Sun, 7 Apr 2024 11:04:08 -0400
Subject: [PATCH] metal CI run llama with 4 shards (#4103)

this can catch multi tensor issue on mac.
---
 .github/workflows/benchmark.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 06d94e8c5d..f9ede11899 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -44,6 +44,8 @@ jobs:
         JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
     - name: Run LLaMA with BEAM
       run: JIT=1 BEAM=2 CACHELEVEL=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt
+    - name: Run LLaMA 7B on 4 (virtual) GPUs
+      run: JIT=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_four_gpu.txt
     - name: Run GPT2
       run: |
         JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
@@ -72,6 +74,7 @@ jobs:
           llama_unjitted.txt
           llama_jitted.txt
           llama_beam.txt
+          llama_four_gpu.txt
           gpt2_unjitted.txt
           gpt2_jitted.txt
           gpt2_half.txt