From 73bddc44f69ca4c43d57eb08ea6fc773bb90c7d9 Mon Sep 17 00:00:00 2001 From: Elias Wahl <82230675+Eliulm@users.noreply.github.com> Date: Mon, 8 Jul 2024 15:07:44 +0200 Subject: [PATCH] Fix fake dataloader (#5326) --- examples/handcode_bert_opt.py | 6 +++--- examples/mlperf/helpers.py | 14 +++++++------- examples/mlperf/model_train.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/handcode_bert_opt.py b/examples/handcode_bert_opt.py index a0e0bc4ee5..0918123fe3 100644 --- a/examples/handcode_bert_opt.py +++ b/examples/handcode_bert_opt.py @@ -30,9 +30,9 @@ if __name__ == "__main__": input_ids = Tensor.empty((BS, 512), dtype=dtypes.float32) segment_ids = Tensor.empty((BS, 512), dtype=dtypes.float32) attention_mask = Tensor.empty((BS, 512), dtype=dtypes.default_float) - masked_positions = Tensor.empty((BS, 512), dtype=dtypes.float32) - masked_lm_ids = Tensor.empty((BS, 512), dtype=dtypes.float32) - masked_lm_weights = Tensor.empty((BS, 512), dtype=dtypes.float32) + masked_positions = Tensor.empty((BS, 76), dtype=dtypes.float32) + masked_lm_ids = Tensor.empty((BS, 76), dtype=dtypes.float32) + masked_lm_weights = Tensor.empty((BS, 76), dtype=dtypes.float32) next_sentence_labels = Tensor.empty((BS, 1), dtype=dtypes.float32) # run model twice to get only what changes, these are the kernels of the model diff --git a/examples/mlperf/helpers.py b/examples/mlperf/helpers.py index 04588cd804..dab413e5db 100644 --- a/examples/mlperf/helpers.py +++ b/examples/mlperf/helpers.py @@ -230,11 +230,11 @@ def get_data_bert(GPUS:list[str], it): def get_fake_data_bert(GPUS:list[str], BS:int): return { - "input_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), - "input_mask": Tensor.zeros((BS, 512), dtype=dtypes.default_float).contiguous().shard_(GPUS, axis=0), - "segment_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), - "masked_lm_positions": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), - "masked_lm_ids": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), - "masked_lm_weights": Tensor.zeros((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), - "next_sentence_labels": Tensor.zeros((BS, 1), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "input_ids": Tensor.empty((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "input_mask": Tensor.empty((BS, 512), dtype=dtypes.default_float).contiguous().shard_(GPUS, axis=0), + "segment_ids": Tensor.empty((BS, 512), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "masked_lm_positions": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "masked_lm_ids": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "masked_lm_weights": Tensor.empty((BS, 76), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), + "next_sentence_labels": Tensor.empty((BS, 1), dtype=dtypes.float32).contiguous().shard_(GPUS, axis=0), } diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py index c7081cfa4d..ff70c5edc7 100644 --- a/examples/mlperf/model_train.py +++ b/examples/mlperf/model_train.py @@ -590,7 +590,7 @@ def train_bert(): for j in tqdm(range(max_eval_steps), desc="Evaluating", total=max_eval_steps, disable=BENCHMARK): if INITMLPERF: - eval_data = get_fake_data_bert(GPUS, BS) + eval_data = get_fake_data_bert(GPUS, EVAL_BS) else: eval_data = get_data_bert(GPUS, eval_it) GlobalCounters.reset()