move memory into schedule (#4597)

This commit is contained in:
George Hotz
2024-05-15 07:54:20 -07:00
committed by GitHub
parent a4a23c40a0
commit 53d082a2aa
6 changed files with 45 additions and 52 deletions

View File

@@ -5,9 +5,8 @@ from tinygrad import Device, nn, Tensor, dtypes, Variable
Device.DEFAULT = "CLANG"
from train_gpt2 import GPT, GPTConfig
from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GRAPH, GlobalCounters, ansilen, to_function_name
from tinygrad.engine.schedule import create_schedule
from tinygrad.engine.schedule import create_schedule, memory_planner
from tinygrad.engine.realize import get_linearizer, run_schedule
from tinygrad.engine.memory import memory_planner
from tinygrad.ops import BufferOps, LoadOps
TIMING = getenv("TIMING")

View File

@@ -18,8 +18,7 @@ from tinygrad.dtype import ImageDType
from tinygrad.device import Buffer
from tinygrad.helpers import partition, Context, fetch, getenv, DEBUG
from tinygrad.engine.realize import run_schedule, lower_schedule, ExecItem, CompiledRunner
from tinygrad.engine.memory import memory_planner
from tinygrad.engine.schedule import ScheduleItem, create_schedule
from tinygrad.engine.schedule import ScheduleItem, create_schedule, memory_planner
from tinygrad.ops import LoadOps
Device.DEFAULT = "GPU"

View File

@@ -9,7 +9,7 @@ from tinygrad.dtype import DType
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.symbolic import Variable, sint
from tinygrad.engine.realize import ExecItem, capturing, EmptyOp, ViewOp, BufferXfer, CompiledRunner, Runner
from tinygrad.engine.memory import _internal_memory_planner
from tinygrad.engine.schedule import _internal_memory_planner
from tinygrad.nn.state import get_parameters
from weakref import WeakKeyDictionary

View File

@@ -1,43 +0,0 @@
from typing import List, Dict, DefaultDict, Tuple, Union
from collections import defaultdict
from tinygrad.dtype import DType
from tinygrad.device import Buffer
from tinygrad.helpers import getenv, DEBUG, dedup
from tinygrad.engine.schedule import ScheduleItem
def _internal_memory_planner(buffers:List[Union[List[Buffer], Tuple[Buffer, ...]]], debug_prefix="") -> Dict[Buffer, Buffer]:
if getenv("NO_MEMORY_PLANNER"): return {}
last_appearance = {}
for i,u in enumerate(buffers):
for buf in u: last_appearance[buf] = i
# LRU algorithm
assigned: Dict[Buffer, Buffer] = {}
local_cache: DefaultDict[Tuple[str, int, DType], List[Buffer]] = defaultdict(list)
def handle_buffer(buf):
key = (buf.device, buf.size, buf.dtype)
if buf not in assigned:
if len(ll:=local_cache[key]): assigned[buf] = ll.pop()
else: assigned[buf] = Buffer(*key)
if i == last_appearance[buf]:
if assigned[buf] not in local_cache[key]: local_cache[key].append(assigned[buf])
for i,u in enumerate(buffers):
for buf in u:
# all unallocated unparented buffers are fair game to replace
if buf.is_allocated() or buf.lb_refcount > 0: continue
# handle view buffers
if buf._base is not None:
assigned[buf] = Buffer(buf.device, buf.size, buf.dtype, base=assigned.get(buf._base, buf._base), offset=buf.offset)
else:
handle_buffer(buf)
if DEBUG >= 1 and len(ak:=dedup(assigned.keys())) != len(av:=dedup(assigned.values())):
print(debug_prefix+f"memory reduced from {sum([x.nbytes for x in ak])/1e6:.2f} MB -> {sum([x.nbytes for x in av])/1e6:.2f} MB,",
f"{len(ak)} -> {len(av)} bufs")
return assigned
def memory_planner(schedule:List[ScheduleItem]) -> List[ScheduleItem]:
assigned = _internal_memory_planner([si.bufs for si in schedule])
return [ScheduleItem(si.ast, tuple(assigned.get(x, x) for x in si.bufs)) for si in schedule]

View File

@@ -1,12 +1,12 @@
import sys, pickle, atexit
from collections import defaultdict, deque
from dataclasses import dataclass
from typing import Tuple, List, Dict, Optional, Set, DefaultDict
from typing import Tuple, List, Dict, Optional, Set, DefaultDict, Union
from tinygrad.ops import LoadOps, BufferOps, LazyOp, ReduceOps, ConstBuffer, MemBuffer, UNSAFE_PAD_OPS, UnaryOps
from tinygrad.engine.graph import log_lazybuffer, realized_lazybuffer
from tinygrad.helpers import GRAPH, DEBUG, MULTIOUTPUT, SAVE_SCHEDULE, GlobalCounters, prod, dedup, all_int, merge_dicts, getenv
from tinygrad.shape.symbolic import Variable
from tinygrad.dtype import ImageDType, dtypes
from tinygrad.dtype import ImageDType, dtypes, DType
from tinygrad.lazy import LazyBuffer
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.device import Buffer
@@ -318,3 +318,42 @@ def create_schedule(outs:List[LazyBuffer], seen:Optional[Set[LazyBuffer]]=None)
schedule, var_vals = create_schedule_with_vars(outs, seen)
assert len(var_vals) == 0
return schedule
# *** memory planning ***
def _internal_memory_planner(buffers:List[Union[List[Buffer], Tuple[Buffer, ...]]], debug_prefix="") -> Dict[Buffer, Buffer]:
if getenv("NO_MEMORY_PLANNER"): return {}
last_appearance = {}
for i,u in enumerate(buffers):
for buf in u: last_appearance[buf] = i
# LRU algorithm
assigned: Dict[Buffer, Buffer] = {}
local_cache: DefaultDict[Tuple[str, int, DType], List[Buffer]] = defaultdict(list)
def handle_buffer(buf):
key = (buf.device, buf.size, buf.dtype)
if buf not in assigned:
if len(ll:=local_cache[key]): assigned[buf] = ll.pop()
else: assigned[buf] = Buffer(*key)
if i == last_appearance[buf]:
if assigned[buf] not in local_cache[key]: local_cache[key].append(assigned[buf])
for i,u in enumerate(buffers):
for buf in u:
# all unallocated unparented buffers are fair game to replace
if buf.is_allocated() or buf.lb_refcount > 0: continue
# handle view buffers
if buf._base is not None:
assigned[buf] = Buffer(buf.device, buf.size, buf.dtype, base=assigned.get(buf._base, buf._base), offset=buf.offset)
else:
handle_buffer(buf)
if DEBUG >= 1 and len(ak:=dedup(assigned.keys())) != len(av:=dedup(assigned.values())):
print(debug_prefix+f"memory reduced from {sum([x.nbytes for x in ak])/1e6:.2f} MB -> {sum([x.nbytes for x in av])/1e6:.2f} MB,",
f"{len(ak)} -> {len(av)} bufs")
return assigned
def memory_planner(schedule:List[ScheduleItem]) -> List[ScheduleItem]:
assigned = _internal_memory_planner([si.bufs for si in schedule])
return [ScheduleItem(si.ast, tuple(assigned.get(x, x) for x in si.bufs)) for si in schedule]

View File

@@ -16,8 +16,7 @@ from tinygrad.device import Buffer, BufferOptions
from tinygrad.device import Device
from tinygrad.shape.symbolic import sint, Variable, MulNode, Node
from tinygrad.engine.realize import run_schedule
from tinygrad.engine.memory import memory_planner
from tinygrad.engine.schedule import ScheduleItem, create_schedule_with_vars
from tinygrad.engine.schedule import ScheduleItem, create_schedule_with_vars, memory_planner
# **** start with two base classes, Tensor and Function ****