Files
tinygrad/test/amd/hw/test_vopc.py

965 lines
37 KiB
Python

"""Tests for VOPC instructions - vector compare operations.
Includes: v_cmp_class_f32, v_cmp_class_f16, v_cmp_eq_*, v_cmp_lt_*, v_cmp_gt_*
"""
import unittest
from test.amd.hw.helpers import *
VCC = 106 # SGPR index for VCC_LO
class TestCmpClass(unittest.TestCase):
"""Tests for V_CMP_CLASS_F32 float classification."""
def test_cmp_class_quiet_nan(self):
"""V_CMP_CLASS_F32 detects quiet NaN."""
quiet_nan = 0x7fc00000
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0b0000000010), # bit 1 = quiet NaN
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect quiet NaN")
def test_cmp_class_signaling_nan(self):
"""V_CMP_CLASS_F32 detects signaling NaN."""
signal_nan = 0x7f800001
instructions = [
s_mov_b32(s[0], signal_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0b0000000001), # bit 0 = signaling NaN
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect signaling NaN")
def test_cmp_class_positive_inf(self):
"""V_CMP_CLASS_F32 detects +inf."""
pos_inf = 0x7f800000
instructions = [
s_mov_b32(s[0], pos_inf),
s_mov_b32(s[1], 0b1000000000), # bit 9 = +inf
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect +inf")
def test_cmp_class_negative_inf(self):
"""V_CMP_CLASS_F32 detects -inf."""
neg_inf = 0xff800000
instructions = [
s_mov_b32(s[0], neg_inf),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0b0000000100), # bit 2 = -inf
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect -inf")
def test_cmp_class_normal_positive(self):
"""V_CMP_CLASS_F32 detects positive normal."""
instructions = [
v_mov_b32_e32(v[0], 1.0),
s_mov_b32(s[1], 0b0100000000), # bit 8 = positive normal
v_mov_b32_e32(v[1], s[1]),
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect positive normal")
def test_cmp_class_normal_negative(self):
"""V_CMP_CLASS_F32 detects negative normal."""
instructions = [
v_mov_b32_e32(v[0], -1.0),
v_mov_b32_e32(v[1], 0b0000001000), # bit 3 = negative normal
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect negative normal")
def test_cmp_class_quiet_nan_not_signaling(self):
"""Quiet NaN does not match signaling NaN mask."""
quiet_nan = 0x7fc00000
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0b0000000001), # bit 0 = signaling NaN only
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "Quiet NaN should not match signaling mask")
def test_cmp_class_signaling_nan_not_quiet(self):
"""Signaling NaN does not match quiet NaN mask."""
signal_nan = 0x7f800001
instructions = [
s_mov_b32(s[0], signal_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0b0000000010), # bit 1 = quiet NaN only
v_cmp_class_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "Signaling NaN should not match quiet mask")
def test_v_cmp_lg_f32_nan(self):
"""v_cmp_lg_f32 is ordered not-equal (<>): NaN <> x should be False per IEEE 754."""
quiet_nan = 0x7fc00000
one_f32 = 0x3f800000 # 1.0f
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
s_mov_b32(s[1], one_f32),
v_mov_b32_e32(v[1], s[1]),
v_cmp_lg_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "v_cmp_lg_f32(NaN, 1.0) should be 0")
def test_v_cmp_neq_f32_nan(self):
"""v_cmp_neq_f32 is unordered not-equal (!=): NaN != x should be True per IEEE 754."""
quiet_nan = 0x7fc00000
one_f32 = 0x3f800000 # 1.0f
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
s_mov_b32(s[1], one_f32),
v_mov_b32_e32(v[1], s[1]),
v_cmp_neq_f32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "v_cmp_neq_f32(NaN, 1.0) should be 1")
def test_v_cmp_sets_vcc_bits(self):
"""V_CMP_EQ sets VCC bits based on per-lane comparison."""
instructions = [
s_mov_b32(s[0], 5),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[0]),
v_cmp_eq_u32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=4)
self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should match")
class TestCmpClassF16(unittest.TestCase):
"""Tests for V_CMP_CLASS_F16 float classification.
Class bit mapping:
bit 0 = signaling NaN
bit 1 = quiet NaN
bit 2 = -infinity
bit 3 = -normal
bit 4 = -denormal
bit 5 = -zero
bit 6 = +zero
bit 7 = +denormal
bit 8 = +normal
bit 9 = +infinity
"""
def test_cmp_class_f16_positive_zero(self):
"""V_CMP_CLASS_F16: +zero matches bit 6."""
instructions = [
v_mov_b32_e32(v[0], 0x0000), # f16 +0.0
v_mov_b32_e32(v[1], 0x40), # bit 6 = +zero
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect positive zero")
def test_cmp_class_f16_negative_zero(self):
"""V_CMP_CLASS_F16: -zero matches bit 5."""
instructions = [
s_mov_b32(s[0], 0x8000), # f16 -0.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x20), # bit 5 = -zero
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect negative zero")
def test_cmp_class_f16_positive_normal(self):
"""V_CMP_CLASS_F16: +1.0 (normal) matches bit 8."""
instructions = [
s_mov_b32(s[0], 0x3c00), # f16 +1.0
s_mov_b32(s[1], 0x100), # bit 8 = +normal
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect positive normal")
def test_cmp_class_f16_negative_normal(self):
"""V_CMP_CLASS_F16: -1.0 (normal) matches bit 3."""
instructions = [
s_mov_b32(s[0], 0xbc00), # f16 -1.0
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x08), # bit 3 = -normal
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect negative normal")
def test_cmp_class_f16_positive_infinity(self):
"""V_CMP_CLASS_F16: +inf matches bit 9."""
instructions = [
s_mov_b32(s[0], 0x7c00), # f16 +inf
s_mov_b32(s[1], 0x200), # bit 9 = +inf
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect positive infinity")
def test_cmp_class_f16_negative_infinity(self):
"""V_CMP_CLASS_F16: -inf matches bit 2."""
instructions = [
s_mov_b32(s[0], 0xfc00), # f16 -inf
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x04), # bit 2 = -inf
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect negative infinity")
def test_cmp_class_f16_quiet_nan(self):
"""V_CMP_CLASS_F16: quiet NaN matches bit 1."""
instructions = [
s_mov_b32(s[0], 0x7e00), # f16 quiet NaN
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x02), # bit 1 = quiet NaN
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect quiet NaN")
def test_cmp_class_f16_signaling_nan(self):
"""V_CMP_CLASS_F16: signaling NaN matches bit 0."""
instructions = [
s_mov_b32(s[0], 0x7c01), # f16 signaling NaN
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x01), # bit 0 = signaling NaN
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect signaling NaN")
def test_cmp_class_f16_positive_denormal(self):
"""V_CMP_CLASS_F16: positive denormal matches bit 7."""
instructions = [
v_mov_b32_e32(v[0], 1), # f16 +denormal (0x0001)
v_mov_b32_e32(v[1], 0x80), # bit 7 = +denormal
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect positive denormal")
def test_cmp_class_f16_negative_denormal(self):
"""V_CMP_CLASS_F16: negative denormal matches bit 4."""
instructions = [
s_mov_b32(s[0], 0x8001), # f16 -denormal
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], 0x10), # bit 4 = -denormal
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Should detect negative denormal")
def test_cmp_class_f16_combined_mask_zeros(self):
"""V_CMP_CLASS_F16: mask 0x60 covers both +zero and -zero."""
instructions = [
v_mov_b32_e32(v[0], 0), # f16 +0.0
v_mov_b32_e32(v[1], 0x60), # bits 5 and 6 (+-zero)
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x60")
def test_cmp_class_f16_combined_mask_1f8(self):
"""V_CMP_CLASS_F16: mask 0x1f8 covers -normal,-denorm,-zero,+zero,+denorm,+normal.
This is the exact mask used in the f16 sin kernel at PC=46.
"""
instructions = [
v_mov_b32_e32(v[0], 0), # f16 +0.0
s_mov_b32(s[0], 0x1f8),
v_mov_b32_e32(v[1], s[0]), # mask 0x1f8
v_cmp_class_f16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x1f8")
def test_cmp_class_f16_vop3_encoding(self):
"""V_CMP_CLASS_F16 in VOP3 encoding (v_cmp_class_f16_e64)."""
instructions = [
v_mov_b32_e32(v[0], 0), # f16 +0.0
s_mov_b32(s[0], 0x1f8), # class mask
v_cmp_class_f16_e64(VCC_LO, v[0], s[0]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with VOP3 encoding")
def test_cmp_class_f16_vop3_normal_positive(self):
"""V_CMP_CLASS_F16 VOP3 encoding with +1.0 (normal)."""
instructions = [
s_mov_b32(s[0], 0x3c00), # f16 +1.0
v_mov_b32_e32(v[0], s[0]),
s_mov_b32(s[1], 0x1f8), # class mask
v_cmp_class_f16_e64(VCC_LO, v[0], s[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +1.0 (normal) with mask 0x1f8")
def test_cmp_class_f16_vop3_nan_fails_mask(self):
"""V_CMP_CLASS_F16 VOP3: NaN should NOT match mask 0x1f8 (no NaN bits set)."""
instructions = [
s_mov_b32(s[0], 0x7e00), # f16 quiet NaN
v_mov_b32_e32(v[0], s[0]),
s_mov_b32(s[1], 0x1f8), # class mask
v_cmp_class_f16_e64(VCC_LO, v[0], s[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for NaN with mask 0x1f8 (no NaN bits)")
def test_cmp_class_f16_vop3_inf_fails_mask(self):
"""V_CMP_CLASS_F16 VOP3: +inf should NOT match mask 0x1f8 (no inf bits set)."""
instructions = [
s_mov_b32(s[0], 0x7c00), # f16 +inf
v_mov_b32_e32(v[0], s[0]),
s_mov_b32(s[1], 0x1f8), # class mask
v_cmp_class_f16_e64(VCC_LO, v[0], s[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for +inf with mask 0x1f8 (no inf bits)")
class TestCmpInt(unittest.TestCase):
"""Tests for integer comparison operations."""
def test_v_cmp_eq_u32(self):
"""V_CMP_EQ_U32 sets VCC bits based on per-lane comparison."""
instructions = [
s_mov_b32(s[0], 5),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[0]),
v_cmp_eq_u32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=4)
self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should match")
def test_v_cmp_ne_u32_with_zero(self):
"""V_CMP_NE_U32: compare with zero, used for int->bool cast."""
instructions = [
v_mov_b32_e32(v[1], 0),
v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1)
v_cndmask_b32_e64(v[1], v[1], 1, VCC_LO), # v1[lane1] = 1
v_cmp_ne_u32_e32(0, v[1]), # vcc = (0 != v1)
v_cndmask_b32_e64(v[0], 0, 1, VCC_LO), # v0 = vcc ? 1 : 0
]
st = run_program(instructions, n_lanes=2)
self.assertEqual(st.vgpr[0][0], 0, "lane 0: 0 != 0 should be false")
self.assertEqual(st.vgpr[1][0], 1, "lane 1: 0 != 1 should be true")
self.assertEqual(st.vcc & 0x3, 0x2, "VCC should be 0b10")
def test_v_cmp_ne_u32_all_nonzero(self):
"""V_CMP_NE_U32: all lanes have nonzero values."""
instructions = [
v_mov_b32_e32(v[1], 5),
v_cmp_ne_u32_e32(0, v[1]),
]
st = run_program(instructions, n_lanes=4)
self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should be != 0")
def test_cmp_eq_u16_opsel_lo_lo(self):
"""V_CMP_EQ_U16 comparing lo halves."""
instructions = [
s_mov_b32(s[0], 0x12340005), # lo=5, hi=0x1234
s_mov_b32(s[1], 0xABCD0005), # lo=5, hi=0xABCD
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[1]),
v_cmp_eq_u16_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Lo halves should be equal")
def test_cmp_eq_u16_opsel_hi_hi(self):
"""V_CMP_EQ_U16 comparing hi halves with VOP3 opsel."""
instructions = [
s_mov_b32(s[2], 0x00051234), # hi=5, lo=0x1234
v_mov_b32_e32(v[0], s[2]),
s_mov_b32(s[2], 0x0005ABCD), # hi=5, lo=0xABCD
v_mov_b32_e32(v[1], s[2]),
v_cmp_eq_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.sgpr[0] & 1, 1, "Hi halves should be equal: 5==5")
def test_cmp_eq_u16_opsel_hi_hi_equal(self):
"""V_CMP_EQ_U16 VOP3 with opsel=3 compares hi halves (equal case)."""
instructions = [
s_mov_b32(s[2], 0x12340005), # lo=5, hi=0x1234
v_mov_b32_e32(v[0], s[2]),
s_mov_b32(s[2], 0x12340009), # lo=9, hi=0x1234
v_mov_b32_e32(v[1], s[2]),
v_cmp_eq_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.sgpr[0] & 1, 1, "hi==hi should be true: 0x1234==0x1234")
def test_cmp_gt_u16_opsel_hi(self):
"""V_CMP_GT_U16 VOP3 with opsel=3 compares hi halves."""
instructions = [
s_mov_b32(s[2], 0x99990005), # lo=5, hi=0x9999
v_mov_b32_e32(v[0], s[2]),
s_mov_b32(s[2], 0x12340005), # lo=5, hi=0x1234
v_mov_b32_e32(v[1], s[2]),
v_cmp_gt_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.sgpr[0] & 1, 1, "hi>hi should be true: 0x9999>0x1234")
class TestCmpFloat(unittest.TestCase):
"""Tests for float comparison operations."""
def test_v_cmp_lt_f16_vsrc1_hi(self):
"""V_CMP_LT_F16 with both operands from high half using VOP3 opsel."""
instructions = [
s_mov_b32(s[2], 0x3c000000), # hi=1.0 (f16), lo=0
v_mov_b32_e32(v[0], s[2]),
s_mov_b32(s[2], 0x40000000), # hi=2.0 (f16), lo=0
v_mov_b32_e32(v[1], s[2]),
v_cmp_lt_f16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.sgpr[0] & 1, 1, "1.0 < 2.0 should be true")
def test_v_cmp_gt_f16_vsrc1_hi(self):
"""V_CMP_GT_F16 with both operands from high half using VOP3 opsel."""
instructions = [
s_mov_b32(s[2], 0x40000000), # hi=2.0 (f16), lo=0
v_mov_b32_e32(v[0], s[2]),
s_mov_b32(s[2], 0x3c000000), # hi=1.0 (f16), lo=0
v_mov_b32_e32(v[1], s[2]),
v_cmp_gt_f16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.sgpr[0] & 1, 1, "2.0 > 1.0 should be true")
def test_v_cmp_eq_f16_vsrc1_hi_equal(self):
"""v_cmp_eq_f16 with equal low and high halves."""
instructions = [
s_mov_b32(s[0], 0x42004200), # hi=3.0 (0x4200), lo=3.0 (0x4200)
v_mov_b32_e32(v[0], s[0]),
v_cmp_eq_f16_e32(v[0], v[0].h),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (3.0 == 3.0)")
def test_v_cmp_neq_f16_vsrc1_hi(self):
"""v_cmp_neq_f16 with different low and high halves."""
instructions = [
s_mov_b32(s[0], 0x40003c00), # hi=2.0 (0x4000), lo=1.0 (0x3c00)
v_mov_b32_e32(v[0], s[0]),
v_cmp_lg_f16_e32(v[0], v[0].h),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (1.0 != 2.0)")
def test_v_cmp_nge_f16_inf_self(self):
"""v_cmp_nge_f16 comparing -inf with itself (unordered less than).
Regression test: -inf < -inf should be false (IEEE 754).
"""
instructions = [
s_mov_b32(s[0], 0xFC00FC00), # both halves = -inf (0xFC00)
v_mov_b32_e32(v[0], s[0]),
v_cmp_nge_f16_e32(v[0], v[0].h),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "Expected vcc=0 (-inf >= -inf)")
def test_v_cmp_f16_multilane(self):
"""v_cmp_lt_f16 with vsrc1=v128 across multiple lanes."""
instructions = [
# Lane 0: v0 = 0x40003c00 (hi=2.0, lo=1.0) -> 1.0 < 2.0 = true
# Lane 1: v0 = 0x3c004000 (hi=1.0, lo=2.0) -> 2.0 < 1.0 = false
v_mov_b32_e32(v[0], 0x40003c00), # default
v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1)
v_cndmask_b32_e64(v[0], v[0], 0x3c004000, SrcEnum.VCC_LO),
v_cmp_lt_f16_e32(v[0], v[0].h),
]
st = run_program(instructions, n_lanes=2)
self.assertEqual(st.vcc & 1, 1, "Lane 0: expected vcc=1 (1.0 < 2.0)")
self.assertEqual((st.vcc >> 1) & 1, 0, "Lane 1: expected vcc=0 (2.0 < 1.0)")
class TestVOP3VOPCModifiers(unittest.TestCase):
"""Tests for VOP3 VOPC with abs/neg modifiers."""
def test_v_cmp_ge_f32_abs_both(self):
"""v_cmp_ge_f32 with abs on both sources: abs(0.0) >= abs(-1.0) = false.
Regression test: int16 mod operation uses v_cmp_ge_f32 with abs modifiers.
"""
instructions = [
v_mov_b32_e32(v[0], 0.0),
v_mov_b32_e32(v[1], -1.0),
# abs=0b11 means abs(src0) and abs(src1)
v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "abs(0.0) >= abs(-1.0) should be false")
def test_v_cmp_ge_f32_abs_negative_divisor(self):
"""v_cmp_ge_f32 with abs: remainder check for negative divisor.
Tests the exact comparison used in int16 mod: abs(rem_f) >= abs(div_f).
For 1 % -1: rem_f = 0.0, div_f = -1.0, so abs(0.0) >= abs(-1.0) = false.
"""
instructions = [
v_mov_b32_e32(v[0], 0.0), # remainder as float
v_mov_b32_e32(v[1], -1.0), # divisor as float
v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "abs(0.0) >= abs(-1.0) should be false")
def test_v_cmp_ge_f32_abs_small_remainder(self):
"""v_cmp_ge_f32 with abs: abs(-0.5) >= abs(-3.0) = false."""
instructions = [
v_mov_b32_e32(v[0], -0.5),
v_mov_b32_e32(v[1], -3.0),
v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "abs(-0.5) >= abs(-3.0) should be false")
def test_v_cmp_ge_f32_abs_equal(self):
"""v_cmp_ge_f32 with abs: abs(-1.0) >= abs(1.0) = true."""
instructions = [
v_mov_b32_e32(v[0], -1.0),
v_mov_b32_e32(v[1], 1.0),
v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "abs(-1.0) >= abs(1.0) should be true")
class TestVOP3VOPC64Bit(unittest.TestCase):
"""Tests for VOP3 VOPC with 64-bit operands."""
def test_v_cmp_lt_f64_basic(self):
"""v_cmp_lt_f64: 0.0 < 1.0 = true."""
zero_f64 = f2i64(0.0)
one_f64 = f2i64(1.0)
instructions = [
s_mov_b32(s[0], zero_f64 & 0xffffffff),
s_mov_b32(s[1], zero_f64 >> 32),
s_mov_b32(s[2], one_f64 & 0xffffffff),
s_mov_b32(s[3], one_f64 >> 32),
v_cmp_lt_f64_e64(VCC_LO, s[0:1], s[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "0.0 < 1.0 should be true")
def test_v_cmp_lt_f64_negative(self):
"""v_cmp_lt_f64: -1.0 < 0.0 = true."""
neg_one_f64 = f2i64(-1.0)
zero_f64 = f2i64(0.0)
instructions = [
s_mov_b32(s[0], neg_one_f64 & 0xffffffff),
s_mov_b32(s[1], neg_one_f64 >> 32),
s_mov_b32(s[2], zero_f64 & 0xffffffff),
s_mov_b32(s[3], zero_f64 >> 32),
v_cmp_lt_f64_e64(VCC_LO, s[0:1], s[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "-1.0 < 0.0 should be true")
def test_v_cmp_lt_i64_signed(self):
"""v_cmp_lt_i64: 0 < -1 (signed) = false."""
instructions = [
s_mov_b32(s[0], 0),
s_mov_b32(s[1], 0), # s[0:1] = 0
s_mov_b32(s[2], 0xffffffff),
s_mov_b32(s[3], 0xffffffff), # s[2:3] = -1
v_cmp_lt_i64_e64(VCC_LO, s[0:1], s[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "0 < -1 (signed) should be false")
def test_v_cmp_lt_u64_unsigned(self):
"""v_cmp_lt_u64: 0 < 0xFFFFFFFFFFFFFFFF (unsigned) = true."""
instructions = [
s_mov_b32(s[0], 0),
s_mov_b32(s[1], 0), # s[0:1] = 0
s_mov_b32(s[2], 0xffffffff),
s_mov_b32(s[3], 0xffffffff), # s[2:3] = max uint64
v_cmp_lt_u64_e64(VCC_LO, s[0:1], s[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "0 < max_uint64 should be true")
class TestVOPCF64(unittest.TestCase):
"""Tests for VOPC (E32 encoding) with 64-bit float operands. Regression test for f64 compare bug."""
def test_v_cmp_lt_f64_e32_true(self):
"""v_cmp_lt_f64_e32: 2.0 < 3.0 = true."""
lo0, hi0 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32
lo1, hi1 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32
instructions = [
s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0),
s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1),
v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]),
v_cmp_lt_f64_e32(v[0:1], v[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "2.0 < 3.0 should be true")
def test_v_cmp_lt_f64_e32_false(self):
"""v_cmp_lt_f64_e32: 3.0 < 2.0 = false."""
lo0, hi0 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32
lo1, hi1 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32
instructions = [
s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0),
s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1),
v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]),
v_cmp_lt_f64_e32(v[0:1], v[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "3.0 < 2.0 should be false")
def test_v_cmp_nlt_f64_e32_true(self):
"""v_cmp_nlt_f64_e32: !(3.0 < 2.0) = true."""
lo0, hi0 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32
lo1, hi1 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32
instructions = [
s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0),
s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1),
v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]),
v_cmp_nlt_f64_e32(v[0:1], v[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(3.0 < 2.0) should be true")
def test_v_cmp_nlt_f64_e32_false(self):
"""v_cmp_nlt_f64_e32: !(2.0 < 3.0) = false."""
lo0, hi0 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32
lo1, hi1 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32
instructions = [
s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0),
s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1),
v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]),
v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]),
v_cmp_nlt_f64_e32(v[0:1], v[2:3]),
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "!(2.0 < 3.0) should be false")
class TestCmpxExec(unittest.TestCase):
"""Tests for V_CMPX instructions that modify EXEC mask."""
def test_v_cmpx_ngt_f32_e64_all_true(self):
"""V_CMPX_NGT_F32_E64: all lanes pass (literal <= all values)."""
# 131072.0 = 0x48000000
# All values > 131072, so !(131072 > val) = true for all
instructions = [
s_mov_b32(EXEC_LO, 0x7), # 3 lanes active
v_mov_b32_e32(v[0], f2i(200000.0)), # lane 0
v_cmp_eq_u32_e32(1, v[255]),
v_cndmask_b32_e64(v[1], v[0], f2i(300000.0), VCC_LO), # lane 1
v_cmp_eq_u32_e32(2, v[255]),
v_cndmask_b32_e64(v[1], v[1], f2i(400000.0), VCC_LO), # lane 2
# Now v[1] has: lane0=200000, lane1=300000, lane2=400000
# Compare: !(131072.0 > v[1]) i.e., 131072.0 <= v[1]
v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]),
]
st = run_program(instructions, n_lanes=3)
# All values > 131072, so all lanes should remain active
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x7, "All 3 lanes should remain active")
def test_v_cmpx_ngt_f32_e64_some_false(self):
"""V_CMPX_NGT_F32_E64: some lanes fail (literal > some values)."""
instructions = [
s_mov_b32(EXEC_LO, 0x7), # 3 lanes active
v_mov_b32_e32(v[0], f2i(100000.0)), # lane 0: 131072 > 100000 = true, so !(true) = false
v_cmp_eq_u32_e32(1, v[255]),
v_cndmask_b32_e64(v[1], v[0], f2i(200000.0), VCC_LO), # lane 1: 131072 > 200000 = false, so !(false) = true
v_cmp_eq_u32_e32(2, v[255]),
v_cndmask_b32_e64(v[1], v[1], f2i(150000.0), VCC_LO), # lane 2: 131072 > 150000 = false, so !(false) = true
v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]),
]
st = run_program(instructions, n_lanes=3)
# lane 0: fail (100000 < 131072), lanes 1,2: pass
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x6, "Lanes 1,2 should be active, lane 0 inactive")
def test_v_cmpx_ngt_f32_e64_all_false(self):
"""V_CMPX_NGT_F32_E64: all lanes fail (literal > all values)."""
instructions = [
s_mov_b32(EXEC_LO, 0x7), # 3 lanes active
v_mov_b32_e32(v[0], f2i(100.0)), # all lanes have 100.0
# 131072 > 100 = true, so !(true) = false for all
v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[0]),
]
st = run_program(instructions, n_lanes=3)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x0, "All lanes should be inactive")
def test_v_cmpx_ngt_f32_e64_large_values(self):
"""V_CMPX_NGT_F32_E64: test with values that trigger Payne-Hanek in sin().
This is a regression test for the sin(859240.0) bug.
Values 859240, 1000000, 100594688 should all pass !(131072 > val).
"""
instructions = [
s_mov_b32(EXEC_LO, 0x7), # 3 lanes active
v_mov_b32_e32(v[0], f2i(859240.0)), # lane 0
v_cmp_eq_u32_e32(1, v[255]),
v_cndmask_b32_e64(v[1], v[0], f2i(1000000.0), VCC_LO), # lane 1
v_cmp_eq_u32_e32(2, v[255]),
v_cndmask_b32_e64(v[1], v[1], f2i(100594688.0), VCC_LO), # lane 2
v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]),
]
st = run_program(instructions, n_lanes=3)
# All values > 131072, so !(131072 > val) = true for all
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x7, "All 3 lanes should remain active")
class TestVCCBehavior(unittest.TestCase):
"""Tests for VCC condition code behavior."""
def test_vcc_all_lanes_true(self):
"""VCC should have all bits set when all lanes compare true."""
instructions = [
v_mov_b32_e32(v[0], 5),
v_mov_b32_e32(v[1], 5),
v_cmp_eq_u32_e32(v[0], v[1]),
]
st = run_program(instructions, n_lanes=32)
self.assertEqual(st.vcc, 0xFFFFFFFF, "All 32 lanes should be true")
def test_vcc_lane_dependent(self):
"""VCC should differ per lane based on lane_id comparison."""
instructions = [
v_mov_b32_e32(v[0], 16),
v_cmp_lt_u32_e32(v[255], v[0]), # lanes 0-15 are < 16
]
st = run_program(instructions, n_lanes=32)
self.assertEqual(st.vcc & 0xFFFF, 0xFFFF, "Lanes 0-15 should be true")
self.assertEqual(st.vcc >> 16, 0x0000, "Lanes 16-31 should be false")
class TestCmpNge(unittest.TestCase):
"""Tests for V_CMP_NGE (not-greater-or-equal) with NaN semantics.
NGE = !(a >= b). With NaN inputs:
- If either input is NaN, a >= b is false, so !(false) = true
- This differs from a < b which returns false for NaN inputs
"""
def test_v_cmp_nge_f32_normal_values(self):
"""v_cmp_nge_f32: basic comparison with normal floats."""
instructions = [
v_mov_b32_e32(v[0], f2i(1.0)),
v_mov_b32_e32(v[1], f2i(2.0)),
v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= 2.0) = !(false) = true
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(1.0 >= 2.0) should be true")
def test_v_cmp_nge_f32_equal_values(self):
"""v_cmp_nge_f32: equal values should return false."""
instructions = [
v_mov_b32_e32(v[0], f2i(1.0)),
v_mov_b32_e32(v[1], f2i(1.0)),
v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= 1.0) = !(true) = false
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "!(1.0 >= 1.0) should be false")
def test_v_cmp_nge_f32_greater_value(self):
"""v_cmp_nge_f32: greater value should return false."""
instructions = [
v_mov_b32_e32(v[0], f2i(2.0)),
v_mov_b32_e32(v[1], f2i(1.0)),
v_cmp_nge_f32_e32(v[0], v[1]), # !(2.0 >= 1.0) = !(true) = false
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 0, "!(2.0 >= 1.0) should be false")
def test_v_cmp_nge_f32_neg_inf(self):
"""v_cmp_nge_f32: -inf compared to normal value."""
neg_inf = 0xff800000 # -inf
instructions = [
s_mov_b32(s[0], neg_inf),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], f2i(1.0)),
v_cmp_nge_f32_e32(v[0], v[1]), # !(-inf >= 1.0) = !(false) = true
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(-inf >= 1.0) should be true")
def test_v_cmp_nge_f32_clears_inactive_vcc_bits(self):
"""v_cmp_nge_f32 with partial EXEC clears inactive VCC bits (hardware behavior)."""
neg_inf = 0xff800000 # -inf
instructions = [
# Set VCC to all 1s first
s_mov_b32(VCC_LO, 0xFFFFFFFF),
# Set EXEC to only lane 0
s_mov_b32(EXEC_LO, 0x00000001),
# v0 = 1.0 for lane 0
v_mov_b32_e32(v[0], f2i(1.0)),
# Compare: !(-inf >= 1.0) = true for lane 0
v_cmp_nge_f32_e32(neg_inf, v[0]),
]
st = run_program(instructions, n_lanes=16)
# Hardware clears inactive lane bits, only active lane results remain
# Lane 0 result = 1 (true), lanes 1-15 = 0 (cleared)
self.assertEqual(st.vcc, 0x00000001, "VCC should only have active lane results")
def test_v_cmp_nge_f32_nan_src0(self):
"""v_cmp_nge_f32: NaN in src0 should return true (NaN >= x is false)."""
quiet_nan = 0x7fc00000
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], f2i(1.0)),
v_cmp_nge_f32_e32(v[0], v[1]), # !(NaN >= 1.0) = !(false) = true
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(NaN >= 1.0) should be true")
def test_v_cmp_nge_f32_nan_src1(self):
"""v_cmp_nge_f32: NaN in src1 should return true (x >= NaN is false)."""
quiet_nan = 0x7fc00000
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], f2i(1.0)),
v_mov_b32_e32(v[1], s[0]),
v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= NaN) = !(false) = true
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(1.0 >= NaN) should be true")
def test_v_cmp_nge_f32_both_nan(self):
"""v_cmp_nge_f32: both NaN should return true."""
quiet_nan = 0x7fc00000
instructions = [
s_mov_b32(s[0], quiet_nan),
v_mov_b32_e32(v[0], s[0]),
v_mov_b32_e32(v[1], s[0]),
v_cmp_nge_f32_e32(v[0], v[1]), # !(NaN >= NaN) = !(false) = true
]
st = run_program(instructions, n_lanes=1)
self.assertEqual(st.vcc & 1, 1, "!(NaN >= NaN) should be true")
class TestCmpxPartialWavefront(unittest.TestCase):
"""Tests for V_CMPX with partial wavefronts (fewer than 32 active lanes).
Regression tests for bug where v_cmpx incorrectly set EXEC bits for inactive
lanes when the wavefront had fewer than 32 lanes. This caused garbage data
from uninitialized lanes to corrupt memory writes.
"""
def test_v_cmpx_eq_u32_partial_wave_3_lanes(self):
"""V_CMPX_EQ_U32 with 3 active lanes should only affect those 3 lanes.
With n_lanes=3, initial EXEC=0x7. After v_cmpx comparing lane_id == 1,
only lane 1 should pass, so EXEC should become 0x2 (not have bits 3-31 set).
"""
instructions = [
v_cmpx_eq_u32_e32(1, v[255]), # EXEC = lanes where lane_id == 1
]
st = run_program(instructions, n_lanes=3)
# Only lane 1 should be active (bit 1 set)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x2,
"Only lane 1 should be active after v_cmpx_eq_u32 with 3 lanes")
def test_v_cmpx_eq_u32_partial_wave_5_lanes(self):
"""V_CMPX_EQ_U32 with 5 active lanes."""
instructions = [
v_cmpx_eq_u32_e32(3, v[255]), # EXEC = lanes where lane_id == 3
]
st = run_program(instructions, n_lanes=5)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x8,
"Only lane 3 should be active after v_cmpx_eq_u32 with 5 lanes")
def test_v_cmpx_lt_u32_partial_wave(self):
"""V_CMPX_LT_U32 with partial wavefront."""
# VOPC: src0 < vsrc1, so we need v_cmpx_gt_u32 to get lane_id < 2
instructions = [
v_cmpx_gt_u32_e32(2, v[255]), # EXEC = lanes where 2 > lane_id (i.e., lane_id < 2)
]
st = run_program(instructions, n_lanes=4)
# Lanes 0,1 should be active (bits 0,1 set = 0x3)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x3,
"Only lanes 0,1 should be active after v_cmpx_gt_u32(2, lane_id) with 4 lanes")
def test_v_cmpx_ge_u32_partial_wave(self):
"""V_CMPX_GE_U32 with partial wavefront."""
# VOPC: src0 >= vsrc1, so v_cmpx_le_u32(1, lane_id) gives lane_id >= 2? No.
# v_cmpx_le_u32(src0, vsrc1) = src0 <= vsrc1 = 1 <= lane_id
instructions = [
v_cmpx_le_u32_e32(2, v[255]), # EXEC = lanes where 2 <= lane_id (i.e., lane_id >= 2)
]
st = run_program(instructions, n_lanes=4)
# Lanes 2,3 should be active (bits 2,3 set = 0xC)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0xC,
"Only lanes 2,3 should be active after v_cmpx_le_u32(2, lane_id) with 4 lanes")
def test_v_cmpx_ne_u32_partial_wave_all_pass(self):
"""V_CMPX_NE_U32 where all active lanes pass."""
instructions = [
v_cmpx_ne_u32_e32(99, v[255]), # EXEC = lanes where lane_id != 99
]
st = run_program(instructions, n_lanes=3)
# All 3 lanes should remain active (bits 0,1,2 set = 0x7)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x7,
"All 3 lanes should remain active when all pass")
def test_v_cmpx_eq_u32_partial_wave_none_pass(self):
"""V_CMPX_EQ_U32 where no active lanes pass."""
instructions = [
v_cmpx_eq_u32_e32(99, v[255]), # EXEC = lanes where lane_id == 99
]
st = run_program(instructions, n_lanes=3)
# No lanes should be active
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x0,
"No lanes should be active when none pass")
def test_v_cmpx_f32_partial_wave(self):
"""V_CMPX_GT_F32 with partial wavefront - float comparison."""
instructions = [
v_cvt_f32_u32_e32(v[0], v[255]), # v[0] = float(lane_id)
v_mov_b32_e32(v[1], f2i(0.5)), # v[1] = 0.5
v_cmpx_gt_f32_e32(v[0], v[1]), # EXEC = lanes where v[0] > 0.5
]
st = run_program(instructions, n_lanes=4)
# Lanes 1,2,3 have values > 0.5, lane 0 has 0.0
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0xE,
"Lanes 1,2,3 should be active (float > 0.5)")
def test_v_cmpx_e64_partial_wave(self):
"""V_CMPX_EQ_U32_E64 (VOP3 encoding) with partial wavefront."""
instructions = [
v_cmpx_eq_u32_e64(EXEC_LO, v[255], 2), # EXEC = lanes where lane_id == 2
]
st = run_program(instructions, n_lanes=4)
self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x4,
"Only lane 2 should be active after v_cmpx_eq_u32_e64")
if __name__ == '__main__':
unittest.main()