From eecfdd2f6e40dc2ef5602742f3cf2e80babcd9b8 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Mon, 3 Jun 2024 14:03:16 +0200 Subject: [PATCH] hotfix: fix dataset reading for new llm.c --- examples/llm.c/train_gpt2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/llm.c/train_gpt2.py b/examples/llm.c/train_gpt2.py index 5aca048ccc..388061dd20 100755 --- a/examples/llm.c/train_gpt2.py +++ b/examples/llm.c/train_gpt2.py @@ -142,7 +142,8 @@ if __name__ == "__main__": assert os.path.isfile(tokens_bin) print(f"loading cached tokens in {tokens_bin}") with open(tokens_bin, "rb") as f: - tokens = np.frombuffer(f.read(), dtype=np.int32) + f.seek(0x400) + tokens = np.frombuffer(f.read(), dtype=np.uint16).astype(np.int32) tokens = Tensor(tokens) # lightweight dataloader @@ -161,7 +162,7 @@ if __name__ == "__main__": # forward backward for a few iterations data_iter = iter(get_batch()) x, y = next(data_iter) # we'll overfit this batch below - optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=1e-4) + optimizer = nn.optim.AdamW(nn.state.get_parameters(model), lr=1e-4, weight_decay=0) @TinyJit def step(x, y):