diff --git a/examples/llm.c/export.py b/examples/llm.c/export.py index ee1e9650e1..eafe4c3998 100755 --- a/examples/llm.c/export.py +++ b/examples/llm.c/export.py @@ -5,9 +5,8 @@ from tinygrad import Device, nn, Tensor, dtypes, Variable Device.DEFAULT = "CLANG" from train_gpt2 import GPT, GPTConfig from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GRAPH, GlobalCounters, ansilen, to_function_name -from tinygrad.engine.schedule import create_schedule +from tinygrad.engine.schedule import create_schedule, memory_planner from tinygrad.engine.realize import get_linearizer, run_schedule -from tinygrad.engine.memory import memory_planner from tinygrad.ops import BufferOps, LoadOps TIMING = getenv("TIMING") diff --git a/examples/openpilot/compile2.py b/examples/openpilot/compile2.py index 678e657764..bfa9bac0a7 100644 --- a/examples/openpilot/compile2.py +++ b/examples/openpilot/compile2.py @@ -18,8 +18,7 @@ from tinygrad.dtype import ImageDType from tinygrad.device import Buffer from tinygrad.helpers import partition, Context, fetch, getenv, DEBUG from tinygrad.engine.realize import run_schedule, lower_schedule, ExecItem, CompiledRunner -from tinygrad.engine.memory import memory_planner -from tinygrad.engine.schedule import ScheduleItem, create_schedule +from tinygrad.engine.schedule import ScheduleItem, create_schedule, memory_planner from tinygrad.ops import LoadOps Device.DEFAULT = "GPU" diff --git a/tinygrad/engine/jit.py b/tinygrad/engine/jit.py index 3b6d08eafb..02908e8743 100644 --- a/tinygrad/engine/jit.py +++ b/tinygrad/engine/jit.py @@ -9,7 +9,7 @@ from tinygrad.dtype import DType from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.symbolic import Variable, sint from tinygrad.engine.realize import ExecItem, capturing, EmptyOp, ViewOp, BufferXfer, CompiledRunner, Runner -from tinygrad.engine.memory import _internal_memory_planner +from tinygrad.engine.schedule import _internal_memory_planner from tinygrad.nn.state import get_parameters from weakref import WeakKeyDictionary diff --git a/tinygrad/engine/memory.py b/tinygrad/engine/memory.py deleted file mode 100644 index 6a33476d80..0000000000 --- a/tinygrad/engine/memory.py +++ /dev/null @@ -1,43 +0,0 @@ -from typing import List, Dict, DefaultDict, Tuple, Union -from collections import defaultdict -from tinygrad.dtype import DType -from tinygrad.device import Buffer -from tinygrad.helpers import getenv, DEBUG, dedup -from tinygrad.engine.schedule import ScheduleItem - -def _internal_memory_planner(buffers:List[Union[List[Buffer], Tuple[Buffer, ...]]], debug_prefix="") -> Dict[Buffer, Buffer]: - if getenv("NO_MEMORY_PLANNER"): return {} - last_appearance = {} - for i,u in enumerate(buffers): - for buf in u: last_appearance[buf] = i - - # LRU algorithm - assigned: Dict[Buffer, Buffer] = {} - local_cache: DefaultDict[Tuple[str, int, DType], List[Buffer]] = defaultdict(list) - - def handle_buffer(buf): - key = (buf.device, buf.size, buf.dtype) - if buf not in assigned: - if len(ll:=local_cache[key]): assigned[buf] = ll.pop() - else: assigned[buf] = Buffer(*key) - if i == last_appearance[buf]: - if assigned[buf] not in local_cache[key]: local_cache[key].append(assigned[buf]) - - for i,u in enumerate(buffers): - for buf in u: - # all unallocated unparented buffers are fair game to replace - if buf.is_allocated() or buf.lb_refcount > 0: continue - # handle view buffers - if buf._base is not None: - assigned[buf] = Buffer(buf.device, buf.size, buf.dtype, base=assigned.get(buf._base, buf._base), offset=buf.offset) - else: - handle_buffer(buf) - - if DEBUG >= 1 and len(ak:=dedup(assigned.keys())) != len(av:=dedup(assigned.values())): - print(debug_prefix+f"memory reduced from {sum([x.nbytes for x in ak])/1e6:.2f} MB -> {sum([x.nbytes for x in av])/1e6:.2f} MB,", - f"{len(ak)} -> {len(av)} bufs") - return assigned - -def memory_planner(schedule:List[ScheduleItem]) -> List[ScheduleItem]: - assigned = _internal_memory_planner([si.bufs for si in schedule]) - return [ScheduleItem(si.ast, tuple(assigned.get(x, x) for x in si.bufs)) for si in schedule] diff --git a/tinygrad/engine/schedule.py b/tinygrad/engine/schedule.py index 8a0e00a0f9..d8719b5fb6 100644 --- a/tinygrad/engine/schedule.py +++ b/tinygrad/engine/schedule.py @@ -1,12 +1,12 @@ import sys, pickle, atexit from collections import defaultdict, deque from dataclasses import dataclass -from typing import Tuple, List, Dict, Optional, Set, DefaultDict +from typing import Tuple, List, Dict, Optional, Set, DefaultDict, Union from tinygrad.ops import LoadOps, BufferOps, LazyOp, ReduceOps, ConstBuffer, MemBuffer, UNSAFE_PAD_OPS, UnaryOps from tinygrad.engine.graph import log_lazybuffer, realized_lazybuffer from tinygrad.helpers import GRAPH, DEBUG, MULTIOUTPUT, SAVE_SCHEDULE, GlobalCounters, prod, dedup, all_int, merge_dicts, getenv from tinygrad.shape.symbolic import Variable -from tinygrad.dtype import ImageDType, dtypes +from tinygrad.dtype import ImageDType, dtypes, DType from tinygrad.lazy import LazyBuffer from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.device import Buffer @@ -318,3 +318,42 @@ def create_schedule(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffer]]=None) schedule, var_vals = create_schedule_with_vars(outs, seen) assert len(var_vals) == 0 return schedule + +# *** memory planning *** + +def _internal_memory_planner(buffers:List[Union[List[Buffer], Tuple[Buffer, ...]]], debug_prefix="") -> Dict[Buffer, Buffer]: + if getenv("NO_MEMORY_PLANNER"): return {} + last_appearance = {} + for i,u in enumerate(buffers): + for buf in u: last_appearance[buf] = i + + # LRU algorithm + assigned: Dict[Buffer, Buffer] = {} + local_cache: DefaultDict[Tuple[str, int, DType], List[Buffer]] = defaultdict(list) + + def handle_buffer(buf): + key = (buf.device, buf.size, buf.dtype) + if buf not in assigned: + if len(ll:=local_cache[key]): assigned[buf] = ll.pop() + else: assigned[buf] = Buffer(*key) + if i == last_appearance[buf]: + if assigned[buf] not in local_cache[key]: local_cache[key].append(assigned[buf]) + + for i,u in enumerate(buffers): + for buf in u: + # all unallocated unparented buffers are fair game to replace + if buf.is_allocated() or buf.lb_refcount > 0: continue + # handle view buffers + if buf._base is not None: + assigned[buf] = Buffer(buf.device, buf.size, buf.dtype, base=assigned.get(buf._base, buf._base), offset=buf.offset) + else: + handle_buffer(buf) + + if DEBUG >= 1 and len(ak:=dedup(assigned.keys())) != len(av:=dedup(assigned.values())): + print(debug_prefix+f"memory reduced from {sum([x.nbytes for x in ak])/1e6:.2f} MB -> {sum([x.nbytes for x in av])/1e6:.2f} MB,", + f"{len(ak)} -> {len(av)} bufs") + return assigned + +def memory_planner(schedule:List[ScheduleItem]) -> List[ScheduleItem]: + assigned = _internal_memory_planner([si.bufs for si in schedule]) + return [ScheduleItem(si.ast, tuple(assigned.get(x, x) for x in si.bufs)) for si in schedule] diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 34109ea040..fcc665df07 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -16,8 +16,7 @@ from tinygrad.device import Buffer, BufferOptions from tinygrad.device import Device from tinygrad.shape.symbolic import sint, Variable, MulNode, Node from tinygrad.engine.realize import run_schedule -from tinygrad.engine.memory import memory_planner -from tinygrad.engine.schedule import ScheduleItem, create_schedule_with_vars +from tinygrad.engine.schedule import ScheduleItem, create_schedule_with_vars, memory_planner # **** start with two base classes, Tensor and Function ****