From a3f938dbee2d1fd7d76eb851f37dec5833951619 Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Wed, 23 Apr 2025 14:32:54 -0400
Subject: [PATCH] remove retinanet INITMLPERF from beam script (#10011)

it only controls logging, loading real data or not is solely controlled by RUNMLPERF
---
 examples/mlperf/model_train.py                         | 10 +++++-----
 .../benchmarks/retinanet/tinybox_green/dev_beam.sh     |  3 +--
 .../benchmarks/retinanet/tinybox_red/dev_beam.sh       |  3 +--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py
index 937a12d9a1..990f5bdbd7 100644
--- a/examples/mlperf/model_train.py
+++ b/examples/mlperf/model_train.py
@@ -361,7 +361,7 @@ def train_retinanet():
   NUM_CLASSES = len(MLPERF_CLASSES)
   BASEDIR = getenv("BASEDIR", BASEDIR)
   BENCHMARK = getenv("BENCHMARK")
-  INITMLPERF = getenv("INITMLPERF")
+  # INITMLPERF = getenv("INITMLPERF")
   RUNMLPERF = getenv("RUNMLPERF")
   config["gpus"] = GPUS = [f"{Device.DEFAULT}:{i}" for i in range(getenv("GPUS", 6))]
 
@@ -479,7 +479,7 @@ def train_retinanet():
     # ** training loop **
     BEAM.value = TRAIN_BEAM
 
-    if INITMLPERF:
+    if not RUNMLPERF:
       i, proc = 0, _fake_data_get(BS)
     else:
       train_dataloader = batch_load_retinanet(train_dataset, False, base_dir_path, batch_size=BS, seed=SEED)
@@ -499,7 +499,7 @@ def train_retinanet():
 
       if len(prev_cookies) == getenv("STORE_COOKIES", 1): prev_cookies = []  # free previous cookies after gpu work has been enqueued
       try:
-        if INITMLPERF:
+        if not RUNMLPERF:
           next_proc = _fake_data_get(BS)
         else:
           next_proc = _data_get(it)
@@ -552,7 +552,7 @@ def train_retinanet():
       if getenv("RESET_STEP", 1): _train_step.reset()
 
       with Tensor.train(mode=False), Tensor.test():
-        if INITMLPERF:
+        if not RUNMLPERF:
           i, proc = 0, _fake_data_get(EVAL_BS, val=(val:=True))
         else:
           val_dataloader = batch_load_retinanet(val_dataset, (val:=True), Path(BASEDIR), batch_size=EVAL_BS, shuffle=False, seed=SEED)
@@ -583,7 +583,7 @@ def train_retinanet():
 
           if len(prev_cookies) == getenv("STORE_COOKIES", 1): prev_cookies = []  # free previous cookies after gpu work has been enqueued
           try:
-            if INITMLPERF:
+            if not RUNMLPERF:
               next_proc = _fake_data_get(EVAL_BS, val=val)
             else:
               next_proc = _data_get(it, val=val)
diff --git a/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_green/dev_beam.sh b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_green/dev_beam.sh
index 18f95a5c5d..fc386ce312 100755
--- a/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_green/dev_beam.sh
+++ b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_green/dev_beam.sh
@@ -9,7 +9,6 @@ export BASEDIR="/raid/datasets/openimages"
 
 export TRAIN_BEAM=2 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5 BEAM_PADTO=0
 
-export INITMLPERF=1
-export BENCHMARK=10 DEBUG=2
+export BENCHMARK=5 DEBUG=2
 
 python examples/mlperf/model_train.py
diff --git a/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_red/dev_beam.sh b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_red/dev_beam.sh
index 18f95a5c5d..fc386ce312 100755
--- a/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_red/dev_beam.sh
+++ b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/retinanet/tinybox_red/dev_beam.sh
@@ -9,7 +9,6 @@ export BASEDIR="/raid/datasets/openimages"
 
 export TRAIN_BEAM=2 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5 BEAM_PADTO=0
 
-export INITMLPERF=1
-export BENCHMARK=10 DEBUG=2
+export BENCHMARK=5 DEBUG=2
 
 python examples/mlperf/model_train.py