enable gep noop rule

This commit is contained in:
ttomsa
2026-03-19 20:54:00 +00:00
parent acdc232d65
commit e81878abd9
2 changed files with 4 additions and 6 deletions

View File

@@ -45,7 +45,6 @@ class ISARenderer(Renderer):
post_regalloc_matcher: PatternMatcher
def is_two_address(self, x:UOp) -> bool: return False
def should_rematerialize(self, x:UOp) -> bool: return False
def copy(self, x:UOp, reg:Register) -> UOp: raise NotImplementedError("arch specific")
def spill(self, disp:UOp, x:UOp) -> UOp: raise NotImplementedError("arch specific")
def fill(self, disp:UOp, x:UOp, reg:Register) -> UOp: raise NotImplementedError("arch specific")

View File

@@ -232,7 +232,7 @@ reg_strs = {"rax": {4:"eax", 2:"ax", 1:"al"}, "rcx": {4:"ecx", 2:"cx", 1:"cl"},
# if the load is used multiple times we don't fold
def is_foldable_load(ctx:IselContext, x:UOp, s:UOp) -> bool: return s.op is Ops.LOAD and len(ctx.uses[s]) == x.src.count(s) == 1
def base(x:UOp, i:int) -> UOp: return s.src[0] if (s:=x.src[i]).op is Ops.GEP else s
def lane(x:UOp, i:int) -> int: return x.src[i].arg[0] if x.src[i].op is Ops.GEP else 0
def lane(x:UOp, i:int) -> int: return s.arg[0] if (s:=x.src[i]).op is Ops.GEP else 0
def to_int(dt:DType): return {dtypes.float16: dtypes.int16, dtypes.float32: dtypes.int32, dtypes.float64: dtypes.int64}[dt]
def def_reg(dt:DType, reg:Register|None=None) -> UOp: return UOp(Ops.INS, arg=X86Ops.DEFINE_REG, dtype=dt, tag=None if reg is None else (reg,))
def imm(dt:DType, v:int) -> UOp: return UOp(Ops.INS, arg=X86Ops.IMM, dtype=dt, tag=truncate[dt](v))
@@ -357,10 +357,9 @@ dt_128bit = tuple(dt.vec(l) for dt in dts for l in [16,8,4,2,1] if l*dt.itemsize
isel_matcher = PatternMatcher([
# **** Op -> Op ****
# TODO: this breaks stuff
# float gep(0) is a noop as it just moves the 0th element from one xmm register to another
# this is done here to not interfere with shuffles / gep store fusion
#(UPat(dtype=dtypes.floats).gep(0, name="x"), lambda x: x.replace(op=Ops.NOOP, arg=None)),
# this is done here to not interfere with shuffles
(UPat(dtype=dtypes.floats).gep(0, name="x"), lambda x: x.replace(op=Ops.NOOP, arg=None)),
# range is lowered to acc, cmp, jmp after regalloc
(UPat(Ops.RANGE, src=(UPat.cvar("c"),), allow_any_len=True, name="x"), lambda c,x: x.replace(src=(imm(c.dtype, c.arg),) + x.src[1:])),
(UPat(Ops.RANGE, name="x"), lambda ctx,x: x.replace(tag=(ctx.vreg(WGPR),)) if not isinstance(x.tag, tuple) else None),
@@ -543,7 +542,7 @@ isel_matcher = PatternMatcher([
(UPat(Ops.INDEX, name="x"), lambda x: x.ins(X86Ops.LEA, src=fold_address(x))),
# TODO: fuse stores, very few cases -- store cmp becomes setcc, store gep int becomes vpextr, store bitcast to int becomes vmovd/q
# copy, load, store
# NOTE: copy here violates the spec, it only happens in register allocation when a reg to reg move needs to be inserted
# NOTE: copy here violates the spec, it only happens post register allocation when a reg to reg move needs to be inserted
(UPat(Ops.COPY, dt_128bit, name="x"), lambda x: x.ins(X86Ops.VMOVUPS)),
(UPat(Ops.COPY, dt_64bit, name="x"), lambda x: x.ins(X86Ops.VMOVSD)),
(UPat(Ops.COPY, dt_32bit+dt_16bit, name="x"), lambda x: x.ins(X86Ops.VMOVSS)),