From eb77488f85652e83dc2181294fbb338b1ea2e398 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 23 Jan 2025 19:06:05 -0500 Subject: [PATCH] update llama3 70B to use R1 (#8733) --- examples/llama3.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/llama3.py b/examples/llama3.py index e331573d1a..c24ec6ea2a 100644 --- a/examples/llama3.py +++ b/examples/llama3.py @@ -247,11 +247,11 @@ if __name__ == "__main__": fetch("https://huggingface.co/TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R/resolve/main/model-00004-of-00004.safetensors", "model-00004-of-00004.safetensors", subdir="llama3-8b-sfr") args.model = fetch("https://huggingface.co/TriAiExperiments/SFR-Iterative-DPO-LLaMA-3-8B-R/raw/main/model.safetensors.index.json", "model.safetensors.index.json", subdir="llama3-8b-sfr") elif args.size == "70B": - subdir = "Llama-3.1-Nemotron-70B-Instruct-HF" - args.model = fetch("https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/resolve/main/model.safetensors.index.json?download=true", "model.safetensors.index.json", subdir=subdir) + subdir = "DeepSeek-R1-Distill-Llama-70B" + args.model = fetch("https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/resolve/main/model.safetensors.index.json?download=true", "model.safetensors.index.json", subdir=subdir) fetch("https://huggingface.co/bofenghuang/Meta-Llama-3-8B/resolve/main/original/tokenizer.model", "tokenizer.model", subdir=subdir) - for i in range(30): - fetch(f"https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/resolve/main/model-{i+1:05d}-of-00030.safetensors?download=true", f"model-{i+1:05d}-of-00030.safetensors", subdir=subdir) + for i in range(17): + fetch(f"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/resolve/main/model-{i+1:05d}-of-000017.safetensors?download=true", f"model-{i+1:05d}-of-000017.safetensors", subdir=subdir) assert args.model is not None, "please provide --model option"