"""Tests for VOPC instructions - vector compare operations. Includes: v_cmp_class_f32, v_cmp_class_f16, v_cmp_eq_*, v_cmp_lt_*, v_cmp_gt_* """ import unittest from test.amd.hw.helpers import * VCC = 106 # SGPR index for VCC_LO class TestCmpClass(unittest.TestCase): """Tests for V_CMP_CLASS_F32 float classification.""" def test_cmp_class_quiet_nan(self): """V_CMP_CLASS_F32 detects quiet NaN.""" quiet_nan = 0x7fc00000 instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0b0000000010), # bit 1 = quiet NaN v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect quiet NaN") def test_cmp_class_signaling_nan(self): """V_CMP_CLASS_F32 detects signaling NaN.""" signal_nan = 0x7f800001 instructions = [ s_mov_b32(s[0], signal_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0b0000000001), # bit 0 = signaling NaN v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect signaling NaN") def test_cmp_class_positive_inf(self): """V_CMP_CLASS_F32 detects +inf.""" pos_inf = 0x7f800000 instructions = [ s_mov_b32(s[0], pos_inf), s_mov_b32(s[1], 0b1000000000), # bit 9 = +inf v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect +inf") def test_cmp_class_negative_inf(self): """V_CMP_CLASS_F32 detects -inf.""" neg_inf = 0xff800000 instructions = [ s_mov_b32(s[0], neg_inf), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0b0000000100), # bit 2 = -inf v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect -inf") def test_cmp_class_normal_positive(self): """V_CMP_CLASS_F32 detects positive normal.""" instructions = [ v_mov_b32_e32(v[0], 1.0), s_mov_b32(s[1], 0b0100000000), # bit 8 = positive normal v_mov_b32_e32(v[1], s[1]), v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect positive normal") def test_cmp_class_normal_negative(self): """V_CMP_CLASS_F32 detects negative normal.""" instructions = [ v_mov_b32_e32(v[0], -1.0), v_mov_b32_e32(v[1], 0b0000001000), # bit 3 = negative normal v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect negative normal") def test_cmp_class_quiet_nan_not_signaling(self): """Quiet NaN does not match signaling NaN mask.""" quiet_nan = 0x7fc00000 instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0b0000000001), # bit 0 = signaling NaN only v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "Quiet NaN should not match signaling mask") def test_cmp_class_signaling_nan_not_quiet(self): """Signaling NaN does not match quiet NaN mask.""" signal_nan = 0x7f800001 instructions = [ s_mov_b32(s[0], signal_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0b0000000010), # bit 1 = quiet NaN only v_cmp_class_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "Signaling NaN should not match quiet mask") def test_v_cmp_lg_f32_nan(self): """v_cmp_lg_f32 is ordered not-equal (<>): NaN <> x should be False per IEEE 754.""" quiet_nan = 0x7fc00000 one_f32 = 0x3f800000 # 1.0f instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), s_mov_b32(s[1], one_f32), v_mov_b32_e32(v[1], s[1]), v_cmp_lg_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "v_cmp_lg_f32(NaN, 1.0) should be 0") def test_v_cmp_neq_f32_nan(self): """v_cmp_neq_f32 is unordered not-equal (!=): NaN != x should be True per IEEE 754.""" quiet_nan = 0x7fc00000 one_f32 = 0x3f800000 # 1.0f instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), s_mov_b32(s[1], one_f32), v_mov_b32_e32(v[1], s[1]), v_cmp_neq_f32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "v_cmp_neq_f32(NaN, 1.0) should be 1") def test_v_cmp_sets_vcc_bits(self): """V_CMP_EQ sets VCC bits based on per-lane comparison.""" instructions = [ s_mov_b32(s[0], 5), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[0]), v_cmp_eq_u32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=4) self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should match") class TestCmpClassF16(unittest.TestCase): """Tests for V_CMP_CLASS_F16 float classification. Class bit mapping: bit 0 = signaling NaN bit 1 = quiet NaN bit 2 = -infinity bit 3 = -normal bit 4 = -denormal bit 5 = -zero bit 6 = +zero bit 7 = +denormal bit 8 = +normal bit 9 = +infinity """ def test_cmp_class_f16_positive_zero(self): """V_CMP_CLASS_F16: +zero matches bit 6.""" instructions = [ v_mov_b32_e32(v[0], 0x0000), # f16 +0.0 v_mov_b32_e32(v[1], 0x40), # bit 6 = +zero v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect positive zero") def test_cmp_class_f16_negative_zero(self): """V_CMP_CLASS_F16: -zero matches bit 5.""" instructions = [ s_mov_b32(s[0], 0x8000), # f16 -0.0 v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x20), # bit 5 = -zero v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect negative zero") def test_cmp_class_f16_positive_normal(self): """V_CMP_CLASS_F16: +1.0 (normal) matches bit 8.""" instructions = [ s_mov_b32(s[0], 0x3c00), # f16 +1.0 s_mov_b32(s[1], 0x100), # bit 8 = +normal v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect positive normal") def test_cmp_class_f16_negative_normal(self): """V_CMP_CLASS_F16: -1.0 (normal) matches bit 3.""" instructions = [ s_mov_b32(s[0], 0xbc00), # f16 -1.0 v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x08), # bit 3 = -normal v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect negative normal") def test_cmp_class_f16_positive_infinity(self): """V_CMP_CLASS_F16: +inf matches bit 9.""" instructions = [ s_mov_b32(s[0], 0x7c00), # f16 +inf s_mov_b32(s[1], 0x200), # bit 9 = +inf v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect positive infinity") def test_cmp_class_f16_negative_infinity(self): """V_CMP_CLASS_F16: -inf matches bit 2.""" instructions = [ s_mov_b32(s[0], 0xfc00), # f16 -inf v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x04), # bit 2 = -inf v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect negative infinity") def test_cmp_class_f16_quiet_nan(self): """V_CMP_CLASS_F16: quiet NaN matches bit 1.""" instructions = [ s_mov_b32(s[0], 0x7e00), # f16 quiet NaN v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x02), # bit 1 = quiet NaN v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect quiet NaN") def test_cmp_class_f16_signaling_nan(self): """V_CMP_CLASS_F16: signaling NaN matches bit 0.""" instructions = [ s_mov_b32(s[0], 0x7c01), # f16 signaling NaN v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x01), # bit 0 = signaling NaN v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect signaling NaN") def test_cmp_class_f16_positive_denormal(self): """V_CMP_CLASS_F16: positive denormal matches bit 7.""" instructions = [ v_mov_b32_e32(v[0], 1), # f16 +denormal (0x0001) v_mov_b32_e32(v[1], 0x80), # bit 7 = +denormal v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect positive denormal") def test_cmp_class_f16_negative_denormal(self): """V_CMP_CLASS_F16: negative denormal matches bit 4.""" instructions = [ s_mov_b32(s[0], 0x8001), # f16 -denormal v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], 0x10), # bit 4 = -denormal v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Should detect negative denormal") def test_cmp_class_f16_combined_mask_zeros(self): """V_CMP_CLASS_F16: mask 0x60 covers both +zero and -zero.""" instructions = [ v_mov_b32_e32(v[0], 0), # f16 +0.0 v_mov_b32_e32(v[1], 0x60), # bits 5 and 6 (+-zero) v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x60") def test_cmp_class_f16_combined_mask_1f8(self): """V_CMP_CLASS_F16: mask 0x1f8 covers -normal,-denorm,-zero,+zero,+denorm,+normal. This is the exact mask used in the f16 sin kernel at PC=46. """ instructions = [ v_mov_b32_e32(v[0], 0), # f16 +0.0 s_mov_b32(s[0], 0x1f8), v_mov_b32_e32(v[1], s[0]), # mask 0x1f8 v_cmp_class_f16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x1f8") def test_cmp_class_f16_vop3_encoding(self): """V_CMP_CLASS_F16 in VOP3 encoding (v_cmp_class_f16_e64).""" instructions = [ v_mov_b32_e32(v[0], 0), # f16 +0.0 s_mov_b32(s[0], 0x1f8), # class mask v_cmp_class_f16_e64(VCC_LO, v[0], s[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with VOP3 encoding") def test_cmp_class_f16_vop3_normal_positive(self): """V_CMP_CLASS_F16 VOP3 encoding with +1.0 (normal).""" instructions = [ s_mov_b32(s[0], 0x3c00), # f16 +1.0 v_mov_b32_e32(v[0], s[0]), s_mov_b32(s[1], 0x1f8), # class mask v_cmp_class_f16_e64(VCC_LO, v[0], s[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +1.0 (normal) with mask 0x1f8") def test_cmp_class_f16_vop3_nan_fails_mask(self): """V_CMP_CLASS_F16 VOP3: NaN should NOT match mask 0x1f8 (no NaN bits set).""" instructions = [ s_mov_b32(s[0], 0x7e00), # f16 quiet NaN v_mov_b32_e32(v[0], s[0]), s_mov_b32(s[1], 0x1f8), # class mask v_cmp_class_f16_e64(VCC_LO, v[0], s[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for NaN with mask 0x1f8 (no NaN bits)") def test_cmp_class_f16_vop3_inf_fails_mask(self): """V_CMP_CLASS_F16 VOP3: +inf should NOT match mask 0x1f8 (no inf bits set).""" instructions = [ s_mov_b32(s[0], 0x7c00), # f16 +inf v_mov_b32_e32(v[0], s[0]), s_mov_b32(s[1], 0x1f8), # class mask v_cmp_class_f16_e64(VCC_LO, v[0], s[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for +inf with mask 0x1f8 (no inf bits)") class TestCmpInt(unittest.TestCase): """Tests for integer comparison operations.""" def test_v_cmp_eq_u32(self): """V_CMP_EQ_U32 sets VCC bits based on per-lane comparison.""" instructions = [ s_mov_b32(s[0], 5), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[0]), v_cmp_eq_u32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=4) self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should match") def test_v_cmp_ne_u32_with_zero(self): """V_CMP_NE_U32: compare with zero, used for int->bool cast.""" instructions = [ v_mov_b32_e32(v[1], 0), v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1) v_cndmask_b32_e64(v[1], v[1], 1, VCC_LO), # v1[lane1] = 1 v_cmp_ne_u32_e32(0, v[1]), # vcc = (0 != v1) v_cndmask_b32_e64(v[0], 0, 1, VCC_LO), # v0 = vcc ? 1 : 0 ] st = run_program(instructions, n_lanes=2) self.assertEqual(st.vgpr[0][0], 0, "lane 0: 0 != 0 should be false") self.assertEqual(st.vgpr[1][0], 1, "lane 1: 0 != 1 should be true") self.assertEqual(st.vcc & 0x3, 0x2, "VCC should be 0b10") def test_v_cmp_ne_u32_all_nonzero(self): """V_CMP_NE_U32: all lanes have nonzero values.""" instructions = [ v_mov_b32_e32(v[1], 5), v_cmp_ne_u32_e32(0, v[1]), ] st = run_program(instructions, n_lanes=4) self.assertEqual(st.vcc & 0xf, 0xf, "All lanes should be != 0") def test_cmp_eq_u16_opsel_lo_lo(self): """V_CMP_EQ_U16 comparing lo halves.""" instructions = [ s_mov_b32(s[0], 0x12340005), # lo=5, hi=0x1234 s_mov_b32(s[1], 0xABCD0005), # lo=5, hi=0xABCD v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cmp_eq_u16_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Lo halves should be equal") def test_cmp_eq_u16_opsel_hi_hi(self): """V_CMP_EQ_U16 comparing hi halves with VOP3 opsel.""" instructions = [ s_mov_b32(s[2], 0x00051234), # hi=5, lo=0x1234 v_mov_b32_e32(v[0], s[2]), s_mov_b32(s[2], 0x0005ABCD), # hi=5, lo=0xABCD v_mov_b32_e32(v[1], s[2]), v_cmp_eq_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.sgpr[0] & 1, 1, "Hi halves should be equal: 5==5") def test_cmp_eq_u16_opsel_hi_hi_equal(self): """V_CMP_EQ_U16 VOP3 with opsel=3 compares hi halves (equal case).""" instructions = [ s_mov_b32(s[2], 0x12340005), # lo=5, hi=0x1234 v_mov_b32_e32(v[0], s[2]), s_mov_b32(s[2], 0x12340009), # lo=9, hi=0x1234 v_mov_b32_e32(v[1], s[2]), v_cmp_eq_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.sgpr[0] & 1, 1, "hi==hi should be true: 0x1234==0x1234") def test_cmp_gt_u16_opsel_hi(self): """V_CMP_GT_U16 VOP3 with opsel=3 compares hi halves.""" instructions = [ s_mov_b32(s[2], 0x99990005), # lo=5, hi=0x9999 v_mov_b32_e32(v[0], s[2]), s_mov_b32(s[2], 0x12340005), # lo=5, hi=0x1234 v_mov_b32_e32(v[1], s[2]), v_cmp_gt_u16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.sgpr[0] & 1, 1, "hi>hi should be true: 0x9999>0x1234") class TestCmpFloat(unittest.TestCase): """Tests for float comparison operations.""" def test_v_cmp_lt_f16_vsrc1_hi(self): """V_CMP_LT_F16 with both operands from high half using VOP3 opsel.""" instructions = [ s_mov_b32(s[2], 0x3c000000), # hi=1.0 (f16), lo=0 v_mov_b32_e32(v[0], s[2]), s_mov_b32(s[2], 0x40000000), # hi=2.0 (f16), lo=0 v_mov_b32_e32(v[1], s[2]), v_cmp_lt_f16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.sgpr[0] & 1, 1, "1.0 < 2.0 should be true") def test_v_cmp_gt_f16_vsrc1_hi(self): """V_CMP_GT_F16 with both operands from high half using VOP3 opsel.""" instructions = [ s_mov_b32(s[2], 0x40000000), # hi=2.0 (f16), lo=0 v_mov_b32_e32(v[0], s[2]), s_mov_b32(s[2], 0x3c000000), # hi=1.0 (f16), lo=0 v_mov_b32_e32(v[1], s[2]), v_cmp_gt_f16_e64(vdst=s[0], src0=v[0], src1=v[1], opsel=3), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.sgpr[0] & 1, 1, "2.0 > 1.0 should be true") def test_v_cmp_eq_f16_vsrc1_hi_equal(self): """v_cmp_eq_f16 with equal low and high halves.""" instructions = [ s_mov_b32(s[0], 0x42004200), # hi=3.0 (0x4200), lo=3.0 (0x4200) v_mov_b32_e32(v[0], s[0]), v_cmp_eq_f16_e32(v[0], v[0].h), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (3.0 == 3.0)") def test_v_cmp_neq_f16_vsrc1_hi(self): """v_cmp_neq_f16 with different low and high halves.""" instructions = [ s_mov_b32(s[0], 0x40003c00), # hi=2.0 (0x4000), lo=1.0 (0x3c00) v_mov_b32_e32(v[0], s[0]), v_cmp_lg_f16_e32(v[0], v[0].h), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (1.0 != 2.0)") def test_v_cmp_nge_f16_inf_self(self): """v_cmp_nge_f16 comparing -inf with itself (unordered less than). Regression test: -inf < -inf should be false (IEEE 754). """ instructions = [ s_mov_b32(s[0], 0xFC00FC00), # both halves = -inf (0xFC00) v_mov_b32_e32(v[0], s[0]), v_cmp_nge_f16_e32(v[0], v[0].h), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "Expected vcc=0 (-inf >= -inf)") def test_v_cmp_f16_multilane(self): """v_cmp_lt_f16 with vsrc1=v128 across multiple lanes.""" instructions = [ # Lane 0: v0 = 0x40003c00 (hi=2.0, lo=1.0) -> 1.0 < 2.0 = true # Lane 1: v0 = 0x3c004000 (hi=1.0, lo=2.0) -> 2.0 < 1.0 = false v_mov_b32_e32(v[0], 0x40003c00), # default v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1) v_cndmask_b32_e64(v[0], v[0], 0x3c004000, SrcEnum.VCC_LO), v_cmp_lt_f16_e32(v[0], v[0].h), ] st = run_program(instructions, n_lanes=2) self.assertEqual(st.vcc & 1, 1, "Lane 0: expected vcc=1 (1.0 < 2.0)") self.assertEqual((st.vcc >> 1) & 1, 0, "Lane 1: expected vcc=0 (2.0 < 1.0)") class TestVOP3VOPCModifiers(unittest.TestCase): """Tests for VOP3 VOPC with abs/neg modifiers.""" def test_v_cmp_ge_f32_abs_both(self): """v_cmp_ge_f32 with abs on both sources: abs(0.0) >= abs(-1.0) = false. Regression test: int16 mod operation uses v_cmp_ge_f32 with abs modifiers. """ instructions = [ v_mov_b32_e32(v[0], 0.0), v_mov_b32_e32(v[1], -1.0), # abs=0b11 means abs(src0) and abs(src1) v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "abs(0.0) >= abs(-1.0) should be false") def test_v_cmp_ge_f32_abs_negative_divisor(self): """v_cmp_ge_f32 with abs: remainder check for negative divisor. Tests the exact comparison used in int16 mod: abs(rem_f) >= abs(div_f). For 1 % -1: rem_f = 0.0, div_f = -1.0, so abs(0.0) >= abs(-1.0) = false. """ instructions = [ v_mov_b32_e32(v[0], 0.0), # remainder as float v_mov_b32_e32(v[1], -1.0), # divisor as float v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "abs(0.0) >= abs(-1.0) should be false") def test_v_cmp_ge_f32_abs_small_remainder(self): """v_cmp_ge_f32 with abs: abs(-0.5) >= abs(-3.0) = false.""" instructions = [ v_mov_b32_e32(v[0], -0.5), v_mov_b32_e32(v[1], -3.0), v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "abs(-0.5) >= abs(-3.0) should be false") def test_v_cmp_ge_f32_abs_equal(self): """v_cmp_ge_f32 with abs: abs(-1.0) >= abs(1.0) = true.""" instructions = [ v_mov_b32_e32(v[0], -1.0), v_mov_b32_e32(v[1], 1.0), v_cmp_ge_f32_e64(VCC_LO, v[0], v[1], abs=0b11), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "abs(-1.0) >= abs(1.0) should be true") class TestVOP3VOPC64Bit(unittest.TestCase): """Tests for VOP3 VOPC with 64-bit operands.""" def test_v_cmp_lt_f64_basic(self): """v_cmp_lt_f64: 0.0 < 1.0 = true.""" zero_f64 = f2i64(0.0) one_f64 = f2i64(1.0) instructions = [ s_mov_b32(s[0], zero_f64 & 0xffffffff), s_mov_b32(s[1], zero_f64 >> 32), s_mov_b32(s[2], one_f64 & 0xffffffff), s_mov_b32(s[3], one_f64 >> 32), v_cmp_lt_f64_e64(VCC_LO, s[0:1], s[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "0.0 < 1.0 should be true") def test_v_cmp_lt_f64_negative(self): """v_cmp_lt_f64: -1.0 < 0.0 = true.""" neg_one_f64 = f2i64(-1.0) zero_f64 = f2i64(0.0) instructions = [ s_mov_b32(s[0], neg_one_f64 & 0xffffffff), s_mov_b32(s[1], neg_one_f64 >> 32), s_mov_b32(s[2], zero_f64 & 0xffffffff), s_mov_b32(s[3], zero_f64 >> 32), v_cmp_lt_f64_e64(VCC_LO, s[0:1], s[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "-1.0 < 0.0 should be true") def test_v_cmp_lt_i64_signed(self): """v_cmp_lt_i64: 0 < -1 (signed) = false.""" instructions = [ s_mov_b32(s[0], 0), s_mov_b32(s[1], 0), # s[0:1] = 0 s_mov_b32(s[2], 0xffffffff), s_mov_b32(s[3], 0xffffffff), # s[2:3] = -1 v_cmp_lt_i64_e64(VCC_LO, s[0:1], s[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "0 < -1 (signed) should be false") def test_v_cmp_lt_u64_unsigned(self): """v_cmp_lt_u64: 0 < 0xFFFFFFFFFFFFFFFF (unsigned) = true.""" instructions = [ s_mov_b32(s[0], 0), s_mov_b32(s[1], 0), # s[0:1] = 0 s_mov_b32(s[2], 0xffffffff), s_mov_b32(s[3], 0xffffffff), # s[2:3] = max uint64 v_cmp_lt_u64_e64(VCC_LO, s[0:1], s[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "0 < max_uint64 should be true") class TestVOPCF64(unittest.TestCase): """Tests for VOPC (E32 encoding) with 64-bit float operands. Regression test for f64 compare bug.""" def test_v_cmp_lt_f64_e32_true(self): """v_cmp_lt_f64_e32: 2.0 < 3.0 = true.""" lo0, hi0 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32 lo1, hi1 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32 instructions = [ s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0), s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]), v_cmp_lt_f64_e32(v[0:1], v[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "2.0 < 3.0 should be true") def test_v_cmp_lt_f64_e32_false(self): """v_cmp_lt_f64_e32: 3.0 < 2.0 = false.""" lo0, hi0 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32 lo1, hi1 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32 instructions = [ s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0), s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]), v_cmp_lt_f64_e32(v[0:1], v[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "3.0 < 2.0 should be false") def test_v_cmp_nlt_f64_e32_true(self): """v_cmp_nlt_f64_e32: !(3.0 < 2.0) = true.""" lo0, hi0 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32 lo1, hi1 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32 instructions = [ s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0), s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]), v_cmp_nlt_f64_e32(v[0:1], v[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(3.0 < 2.0) should be true") def test_v_cmp_nlt_f64_e32_false(self): """v_cmp_nlt_f64_e32: !(2.0 < 3.0) = false.""" lo0, hi0 = f2i64(2.0) & 0xffffffff, f2i64(2.0) >> 32 lo1, hi1 = f2i64(3.0) & 0xffffffff, f2i64(3.0) >> 32 instructions = [ s_mov_b32(s[0], lo0), s_mov_b32(s[1], hi0), s_mov_b32(s[2], lo1), s_mov_b32(s[3], hi1), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_mov_b32_e32(v[2], s[2]), v_mov_b32_e32(v[3], s[3]), v_cmp_nlt_f64_e32(v[0:1], v[2:3]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "!(2.0 < 3.0) should be false") class TestCmpxExec(unittest.TestCase): """Tests for V_CMPX instructions that modify EXEC mask.""" def test_v_cmpx_ngt_f32_e64_all_true(self): """V_CMPX_NGT_F32_E64: all lanes pass (literal <= all values).""" # 131072.0 = 0x48000000 # All values > 131072, so !(131072 > val) = true for all instructions = [ s_mov_b32(EXEC_LO, 0x7), # 3 lanes active v_mov_b32_e32(v[0], f2i(200000.0)), # lane 0 v_cmp_eq_u32_e32(1, v[255]), v_cndmask_b32_e64(v[1], v[0], f2i(300000.0), VCC_LO), # lane 1 v_cmp_eq_u32_e32(2, v[255]), v_cndmask_b32_e64(v[1], v[1], f2i(400000.0), VCC_LO), # lane 2 # Now v[1] has: lane0=200000, lane1=300000, lane2=400000 # Compare: !(131072.0 > v[1]) i.e., 131072.0 <= v[1] v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]), ] st = run_program(instructions, n_lanes=3) # All values > 131072, so all lanes should remain active self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x7, "All 3 lanes should remain active") def test_v_cmpx_ngt_f32_e64_some_false(self): """V_CMPX_NGT_F32_E64: some lanes fail (literal > some values).""" instructions = [ s_mov_b32(EXEC_LO, 0x7), # 3 lanes active v_mov_b32_e32(v[0], f2i(100000.0)), # lane 0: 131072 > 100000 = true, so !(true) = false v_cmp_eq_u32_e32(1, v[255]), v_cndmask_b32_e64(v[1], v[0], f2i(200000.0), VCC_LO), # lane 1: 131072 > 200000 = false, so !(false) = true v_cmp_eq_u32_e32(2, v[255]), v_cndmask_b32_e64(v[1], v[1], f2i(150000.0), VCC_LO), # lane 2: 131072 > 150000 = false, so !(false) = true v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]), ] st = run_program(instructions, n_lanes=3) # lane 0: fail (100000 < 131072), lanes 1,2: pass self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x6, "Lanes 1,2 should be active, lane 0 inactive") def test_v_cmpx_ngt_f32_e64_all_false(self): """V_CMPX_NGT_F32_E64: all lanes fail (literal > all values).""" instructions = [ s_mov_b32(EXEC_LO, 0x7), # 3 lanes active v_mov_b32_e32(v[0], f2i(100.0)), # all lanes have 100.0 # 131072 > 100 = true, so !(true) = false for all v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[0]), ] st = run_program(instructions, n_lanes=3) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x0, "All lanes should be inactive") def test_v_cmpx_ngt_f32_e64_large_values(self): """V_CMPX_NGT_F32_E64: test with values that trigger Payne-Hanek in sin(). This is a regression test for the sin(859240.0) bug. Values 859240, 1000000, 100594688 should all pass !(131072 > val). """ instructions = [ s_mov_b32(EXEC_LO, 0x7), # 3 lanes active v_mov_b32_e32(v[0], f2i(859240.0)), # lane 0 v_cmp_eq_u32_e32(1, v[255]), v_cndmask_b32_e64(v[1], v[0], f2i(1000000.0), VCC_LO), # lane 1 v_cmp_eq_u32_e32(2, v[255]), v_cndmask_b32_e64(v[1], v[1], f2i(100594688.0), VCC_LO), # lane 2 v_cmpx_ngt_f32_e64(EXEC_LO, f2i(131072.0), v[1]), ] st = run_program(instructions, n_lanes=3) # All values > 131072, so !(131072 > val) = true for all self.assertEqual(st.sgpr[EXEC_LO.offset] & 0x7, 0x7, "All 3 lanes should remain active") class TestVCCBehavior(unittest.TestCase): """Tests for VCC condition code behavior.""" def test_vcc_all_lanes_true(self): """VCC should have all bits set when all lanes compare true.""" instructions = [ v_mov_b32_e32(v[0], 5), v_mov_b32_e32(v[1], 5), v_cmp_eq_u32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=32) self.assertEqual(st.vcc, 0xFFFFFFFF, "All 32 lanes should be true") def test_vcc_lane_dependent(self): """VCC should differ per lane based on lane_id comparison.""" instructions = [ v_mov_b32_e32(v[0], 16), v_cmp_lt_u32_e32(v[255], v[0]), # lanes 0-15 are < 16 ] st = run_program(instructions, n_lanes=32) self.assertEqual(st.vcc & 0xFFFF, 0xFFFF, "Lanes 0-15 should be true") self.assertEqual(st.vcc >> 16, 0x0000, "Lanes 16-31 should be false") class TestCmpNge(unittest.TestCase): """Tests for V_CMP_NGE (not-greater-or-equal) with NaN semantics. NGE = !(a >= b). With NaN inputs: - If either input is NaN, a >= b is false, so !(false) = true - This differs from a < b which returns false for NaN inputs """ def test_v_cmp_nge_f32_normal_values(self): """v_cmp_nge_f32: basic comparison with normal floats.""" instructions = [ v_mov_b32_e32(v[0], f2i(1.0)), v_mov_b32_e32(v[1], f2i(2.0)), v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= 2.0) = !(false) = true ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(1.0 >= 2.0) should be true") def test_v_cmp_nge_f32_equal_values(self): """v_cmp_nge_f32: equal values should return false.""" instructions = [ v_mov_b32_e32(v[0], f2i(1.0)), v_mov_b32_e32(v[1], f2i(1.0)), v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= 1.0) = !(true) = false ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "!(1.0 >= 1.0) should be false") def test_v_cmp_nge_f32_greater_value(self): """v_cmp_nge_f32: greater value should return false.""" instructions = [ v_mov_b32_e32(v[0], f2i(2.0)), v_mov_b32_e32(v[1], f2i(1.0)), v_cmp_nge_f32_e32(v[0], v[1]), # !(2.0 >= 1.0) = !(true) = false ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 0, "!(2.0 >= 1.0) should be false") def test_v_cmp_nge_f32_neg_inf(self): """v_cmp_nge_f32: -inf compared to normal value.""" neg_inf = 0xff800000 # -inf instructions = [ s_mov_b32(s[0], neg_inf), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], f2i(1.0)), v_cmp_nge_f32_e32(v[0], v[1]), # !(-inf >= 1.0) = !(false) = true ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(-inf >= 1.0) should be true") def test_v_cmp_nge_f32_clears_inactive_vcc_bits(self): """v_cmp_nge_f32 with partial EXEC clears inactive VCC bits (hardware behavior).""" neg_inf = 0xff800000 # -inf instructions = [ # Set VCC to all 1s first s_mov_b32(VCC_LO, 0xFFFFFFFF), # Set EXEC to only lane 0 s_mov_b32(EXEC_LO, 0x00000001), # v0 = 1.0 for lane 0 v_mov_b32_e32(v[0], f2i(1.0)), # Compare: !(-inf >= 1.0) = true for lane 0 v_cmp_nge_f32_e32(neg_inf, v[0]), ] st = run_program(instructions, n_lanes=16) # Hardware clears inactive lane bits, only active lane results remain # Lane 0 result = 1 (true), lanes 1-15 = 0 (cleared) self.assertEqual(st.vcc, 0x00000001, "VCC should only have active lane results") def test_v_cmp_nge_f32_nan_src0(self): """v_cmp_nge_f32: NaN in src0 should return true (NaN >= x is false).""" quiet_nan = 0x7fc00000 instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], f2i(1.0)), v_cmp_nge_f32_e32(v[0], v[1]), # !(NaN >= 1.0) = !(false) = true ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(NaN >= 1.0) should be true") def test_v_cmp_nge_f32_nan_src1(self): """v_cmp_nge_f32: NaN in src1 should return true (x >= NaN is false).""" quiet_nan = 0x7fc00000 instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], f2i(1.0)), v_mov_b32_e32(v[1], s[0]), v_cmp_nge_f32_e32(v[0], v[1]), # !(1.0 >= NaN) = !(false) = true ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(1.0 >= NaN) should be true") def test_v_cmp_nge_f32_both_nan(self): """v_cmp_nge_f32: both NaN should return true.""" quiet_nan = 0x7fc00000 instructions = [ s_mov_b32(s[0], quiet_nan), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[0]), v_cmp_nge_f32_e32(v[0], v[1]), # !(NaN >= NaN) = !(false) = true ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vcc & 1, 1, "!(NaN >= NaN) should be true") class TestCmpxPartialWavefront(unittest.TestCase): """Tests for V_CMPX with partial wavefronts (fewer than 32 active lanes). Regression tests for bug where v_cmpx incorrectly set EXEC bits for inactive lanes when the wavefront had fewer than 32 lanes. This caused garbage data from uninitialized lanes to corrupt memory writes. """ def test_v_cmpx_eq_u32_partial_wave_3_lanes(self): """V_CMPX_EQ_U32 with 3 active lanes should only affect those 3 lanes. With n_lanes=3, initial EXEC=0x7. After v_cmpx comparing lane_id == 1, only lane 1 should pass, so EXEC should become 0x2 (not have bits 3-31 set). """ instructions = [ v_cmpx_eq_u32_e32(1, v[255]), # EXEC = lanes where lane_id == 1 ] st = run_program(instructions, n_lanes=3) # Only lane 1 should be active (bit 1 set) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x2, "Only lane 1 should be active after v_cmpx_eq_u32 with 3 lanes") def test_v_cmpx_eq_u32_partial_wave_5_lanes(self): """V_CMPX_EQ_U32 with 5 active lanes.""" instructions = [ v_cmpx_eq_u32_e32(3, v[255]), # EXEC = lanes where lane_id == 3 ] st = run_program(instructions, n_lanes=5) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x8, "Only lane 3 should be active after v_cmpx_eq_u32 with 5 lanes") def test_v_cmpx_lt_u32_partial_wave(self): """V_CMPX_LT_U32 with partial wavefront.""" # VOPC: src0 < vsrc1, so we need v_cmpx_gt_u32 to get lane_id < 2 instructions = [ v_cmpx_gt_u32_e32(2, v[255]), # EXEC = lanes where 2 > lane_id (i.e., lane_id < 2) ] st = run_program(instructions, n_lanes=4) # Lanes 0,1 should be active (bits 0,1 set = 0x3) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x3, "Only lanes 0,1 should be active after v_cmpx_gt_u32(2, lane_id) with 4 lanes") def test_v_cmpx_ge_u32_partial_wave(self): """V_CMPX_GE_U32 with partial wavefront.""" # VOPC: src0 >= vsrc1, so v_cmpx_le_u32(1, lane_id) gives lane_id >= 2? No. # v_cmpx_le_u32(src0, vsrc1) = src0 <= vsrc1 = 1 <= lane_id instructions = [ v_cmpx_le_u32_e32(2, v[255]), # EXEC = lanes where 2 <= lane_id (i.e., lane_id >= 2) ] st = run_program(instructions, n_lanes=4) # Lanes 2,3 should be active (bits 2,3 set = 0xC) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0xC, "Only lanes 2,3 should be active after v_cmpx_le_u32(2, lane_id) with 4 lanes") def test_v_cmpx_ne_u32_partial_wave_all_pass(self): """V_CMPX_NE_U32 where all active lanes pass.""" instructions = [ v_cmpx_ne_u32_e32(99, v[255]), # EXEC = lanes where lane_id != 99 ] st = run_program(instructions, n_lanes=3) # All 3 lanes should remain active (bits 0,1,2 set = 0x7) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x7, "All 3 lanes should remain active when all pass") def test_v_cmpx_eq_u32_partial_wave_none_pass(self): """V_CMPX_EQ_U32 where no active lanes pass.""" instructions = [ v_cmpx_eq_u32_e32(99, v[255]), # EXEC = lanes where lane_id == 99 ] st = run_program(instructions, n_lanes=3) # No lanes should be active self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x0, "No lanes should be active when none pass") def test_v_cmpx_f32_partial_wave(self): """V_CMPX_GT_F32 with partial wavefront - float comparison.""" instructions = [ v_cvt_f32_u32_e32(v[0], v[255]), # v[0] = float(lane_id) v_mov_b32_e32(v[1], f2i(0.5)), # v[1] = 0.5 v_cmpx_gt_f32_e32(v[0], v[1]), # EXEC = lanes where v[0] > 0.5 ] st = run_program(instructions, n_lanes=4) # Lanes 1,2,3 have values > 0.5, lane 0 has 0.0 self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0xE, "Lanes 1,2,3 should be active (float > 0.5)") def test_v_cmpx_e64_partial_wave(self): """V_CMPX_EQ_U32_E64 (VOP3 encoding) with partial wavefront.""" instructions = [ v_cmpx_eq_u32_e64(EXEC_LO, v[255], 2), # EXEC = lanes where lane_id == 2 ] st = run_program(instructions, n_lanes=4) self.assertEqual(st.sgpr[EXEC_LO.offset] & 0xFFFFFFFF, 0x4, "Only lane 2 should be active after v_cmpx_eq_u32_e64") if __name__ == '__main__': unittest.main()