From 73bddc44f69ca4c43d57eb08ea6fc773bb90c7d9 Mon Sep 17 00:00:00 2001
From: Elias Wahl <82230675+Eliulm@users.noreply.github.com>
Date: Mon, 8 Jul 2024 15:07:44 +0200
Subject: [PATCH] Fix fake dataloader (#5326)

---
 examples/handcode_bert_opt.py  |  6 +++---
 examples/mlperf/helpers.py     | 14 +++++++-------
 examples/mlperf/model_train.py |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/handcode_bert_opt.py b/examples/handcode_bert_opt.py
index a0e0bc4ee5..0918123fe3 100644
--- a/examples/handcode_bert_opt.py
+++ b/examples/handcode_bert_opt.py
@@ -30,9 +30,9 @@ if __name__ == "__main__":
   input_ids = Tensor.empty((BS, 512), dtype=dtypes.float32)
   segment_ids = Tensor.empty((BS, 512), dtype=dtypes.float32)
   attention_mask = Tensor.empty((BS, 512), dtype=dtypes.default_float)
-  masked_positions = Tensor.empty((BS, 512), dtype=dtypes.float32)
-  masked_lm_ids = Tensor.empty((BS, 512), dtype=dtypes.float32)
-  masked_lm_weights = Tensor.empty((BS, 512), dtype=dtypes.float32)
+  masked_positions = Tensor.empty((BS, 76), dtype=dtypes.float32)
+  masked_lm_ids = Tensor.empty((BS, 76), dtype=dtypes.float32)
+  masked_lm_weights = Tensor.empty((BS, 76), dtype=dtypes.float32)
   next_sentence_labels = Tensor.empty((BS, 1), dtype=dtypes.float32)
 
   # run model twice to get only what changes, these are the kernels of the model
diff --git a/examples/mlperf/helpers.py b/examples/mlperf/helpers.py
index 04588cd804..dab413e5db 100644
--- a/examples/mlperf/helpers.py
+++ b/examples/mlperf/helpers.py
@@ -230,11 +230,11 @@ def get_data_bert(GPUS:list[str], it):
 
 def get_fake_data_bert(GPUS:list[str], BS:int):
   return {
-    "input_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
-    "input_mask": Tensor.zeros((BS, 512), dtype=dtypes.default_float).contiguous().shard_(GPUS, axis=0),
-    "segment_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
-    "masked_lm_positions": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
-    "masked_lm_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
-    "masked_lm_weights": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
-    "next_sentence_labels": Tensor.zeros((BS, 1), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "input_ids": Tensor.empty((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "input_mask": Tensor.empty((BS, 512), dtype=dtypes.default_float).contiguous().shard_(GPUS, axis=0),
+    "segment_ids": Tensor.empty((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "masked_lm_positions": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "masked_lm_ids": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "masked_lm_weights": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
+    "next_sentence_labels": Tensor.empty((BS, 1), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0),
   }
diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py
index c7081cfa4d..ff70c5edc7 100644
--- a/examples/mlperf/model_train.py
+++ b/examples/mlperf/model_train.py
@@ -590,7 +590,7 @@ def train_bert():
 
       for j in tqdm(range(max_eval_steps), desc="Evaluating", total=max_eval_steps, disable=BENCHMARK):
         if INITMLPERF:
-          eval_data = get_fake_data_bert(GPUS, BS)
+          eval_data = get_fake_data_bert(GPUS, EVAL_BS)
         else:
           eval_data = get_data_bert(GPUS, eval_it)
         GlobalCounters.reset()