From a35eef8d5879a901b503dbf867e9ac779ebdbb41 Mon Sep 17 00:00:00 2001 From: chenyu Date: Tue, 24 Dec 2024 21:09:26 -0500 Subject: [PATCH] optionally output to file in self_tokenize.py (#8399) can paste the whole tinygrad in gemini this way --- examples/self_tokenize.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/self_tokenize.py b/examples/self_tokenize.py index 372f1ac5a8..26c0e7de94 100644 --- a/examples/self_tokenize.py +++ b/examples/self_tokenize.py @@ -1,4 +1,4 @@ -import os, pathlib +import os, pathlib, argparse from examples.llama3 import Tokenizer from tabulate import tabulate from tinygrad import fetch @@ -18,7 +18,16 @@ def read_code(base_path): ret += [(fullpath.split("tinygrad/", 1)[1], code)] return ret +def write_code_to_file(filename, code_list): + """Writes the combined code to a specified file.""" + with open(filename, 'w') as f: + f.write('\x00'.join(flatten(code_list))) + if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Analyze and optionally save tinygrad code.") + parser.add_argument("--output", help="Output file to write the combined code to.") + args = parser.parse_args() + ret = read_code(".") table = [] @@ -33,3 +42,7 @@ if __name__ == "__main__": encoded = tokenizer.encode(code_str) print(f"code has {len(encoded)} tokens") + + if args.output: + write_code_to_file(args.output, ret) + print(f"Combined code written to {args.output}") \ No newline at end of file