"""Tests for VOP1 instructions - single operand vector operations. Includes: v_mov_b32, v_cvt_*, v_sin_f32, v_rcp_f32, v_exp_f32, v_rndne_f32, v_floor_f32, v_trunc_f32, v_fract_f32, v_clz_i32_u32, v_ctz_i32_b32, v_readfirstlane_b32 """ import unittest from test.amd.hw.helpers import * class TestMov(unittest.TestCase): """Tests for V_MOV_B32.""" def test_v_mov_b32(self): """V_MOV_B32 moves a value.""" instructions = [ s_mov_b32(s[0], 42), v_mov_b32_e32(v[0], s[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][0], 42) def test_v_mov_all_lanes(self): """V_MOV_B32 sets all lanes to the same value.""" instructions = [ s_mov_b32(s[0], 42), v_mov_b32_e32(v[0], s[0]), ] st = run_program(instructions, n_lanes=4) for lane in range(4): self.assertEqual(st.vgpr[lane][0], 42) def test_v_mov_b16_to_hi(self): """V_MOV_B16 can write to high 16 bits with .h suffix.""" instructions = [ s_mov_b32(s[0], 0x0000DEAD), # lo=0xDEAD, hi=0 v_mov_b32_e32(v[0], s[0]), v_mov_b16_e32(v[0].h, 0x5678), # Move 0x5678 to high half ] st = run_program(instructions, n_lanes=1) result_hi = (st.vgpr[0][0] >> 16) & 0xFFFF result_lo = st.vgpr[0][0] & 0xFFFF self.assertEqual(result_hi, 0x5678, f"Expected hi=0x5678, got 0x{result_hi:04x}") self.assertEqual(result_lo, 0xDEAD, f"Expected lo=0xDEAD (preserved), got 0x{result_lo:04x}") def test_v_mov_b16_to_lo(self): """V_MOV_B16 writes to low 16 bits by default.""" instructions = [ s_mov_b32(s[0], 0xBEEF0000), # hi=0xBEEF, lo=0 v_mov_b32_e32(v[0], s[0]), v_mov_b16_e32(v[0], 0x1234), # Move to low half ] st = run_program(instructions, n_lanes=1) result_hi = (st.vgpr[0][0] >> 16) & 0xFFFF result_lo = st.vgpr[0][0] & 0xFFFF self.assertEqual(result_lo, 0x1234, f"Expected lo=0x1234, got 0x{result_lo:04x}") self.assertEqual(result_hi, 0xBEEF, f"Expected hi=0xBEEF (preserved), got 0x{result_hi:04x}") class TestTrigonometry(unittest.TestCase): """Tests for trigonometric instructions.""" def test_v_sin_f32_small(self): """V_SIN_F32 computes sin for small values.""" import math instructions = [ v_mov_b32_e32(v[0], 1.0), v_sin_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) expected = math.sin(1.0 * 2 * math.pi) self.assertAlmostEqual(result, expected, places=4) def test_v_sin_f32_quarter(self): """V_SIN_F32 at 0.25 cycles = sin(pi/2) = 1.0.""" instructions = [ s_mov_b32(s[0], f2i(0.25)), v_mov_b32_e32(v[0], s[0]), v_sin_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 1.0, places=4) def test_v_sin_f32_large(self): """V_SIN_F32 for large input value (132000.0).""" import math instructions = [ s_mov_b32(s[0], f2i(132000.0)), v_mov_b32_e32(v[0], s[0]), v_sin_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) expected = math.sin(132000.0 * 2 * math.pi) self.assertAlmostEqual(result, expected, places=2, msg=f"sin(132000) got {result}, expected ~{expected}") class TestRounding(unittest.TestCase): """Tests for rounding instructions.""" def test_v_rndne_f32_half_even(self): """V_RNDNE_F32 rounds to nearest even.""" instructions = [ s_mov_b32(s[0], f2i(2.5)), v_mov_b32_e32(v[0], s[0]), v_rndne_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 2.0, places=5) def test_v_rndne_f32_half_odd(self): """V_RNDNE_F32 rounds 3.5 to 4 (nearest even).""" instructions = [ s_mov_b32(s[0], f2i(3.5)), v_mov_b32_e32(v[0], s[0]), v_rndne_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 4.0, places=5) def test_v_rndne_f32_large(self): """V_RNDNE_F32 with large value (like sin reduction uses).""" val = 100000.0 * 0.15915494309189535 instructions = [ s_mov_b32(s[0], f2i(val)), v_mov_b32_e32(v[0], s[0]), v_rndne_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) expected = round(val) self.assertAlmostEqual(i2f(st.vgpr[0][1]), expected, places=0) def test_v_floor_f32(self): """V_FLOOR_F32 floors to integer.""" instructions = [ s_mov_b32(s[0], f2i(3.7)), v_mov_b32_e32(v[0], s[0]), v_floor_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 3.0, places=5) def test_v_trunc_f32(self): """V_TRUNC_F32 truncates toward zero.""" instructions = [ s_mov_b32(s[0], f2i(-3.7)), v_mov_b32_e32(v[0], s[0]), v_trunc_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), -3.0, places=5) def test_v_fract_f32(self): """V_FRACT_F32 returns fractional part.""" instructions = [ s_mov_b32(s[0], f2i(3.75)), v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.75, places=5) def test_v_fract_f32_large(self): """V_FRACT_F32 with large value - precision matters here.""" instructions = [ s_mov_b32(s[0], f2i(132000.25)), v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertGreaterEqual(result, 0.0) self.assertLess(result, 1.0) class TestConversion(unittest.TestCase): """Tests for conversion instructions.""" def test_v_cvt_i32_f32_positive(self): """V_CVT_I32_F32 converts float to signed int.""" instructions = [ s_mov_b32(s[0], f2i(42.7)), v_mov_b32_e32(v[0], s[0]), v_cvt_i32_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 42) def test_v_cvt_i32_f32_negative(self): """V_CVT_I32_F32 converts negative float to signed int.""" instructions = [ s_mov_b32(s[0], f2i(-42.7)), v_mov_b32_e32(v[0], s[0]), v_cvt_i32_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1] & 0xffffffff, (-42) & 0xffffffff) def test_v_cvt_i32_f32_large(self): """V_CVT_I32_F32 with large float (used in sin for quadrant).""" instructions = [ s_mov_b32(s[0], f2i(15915.0)), v_mov_b32_e32(v[0], s[0]), v_cvt_i32_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 15915) def test_v_cvt_f32_i32(self): """V_CVT_F32_I32 converts signed int to float.""" instructions = [ s_mov_b32(s[0], 42), v_mov_b32_e32(v[0], s[0]), v_cvt_f32_i32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 42.0, places=5) def test_v_cvt_f32_u32(self): """V_CVT_F32_U32 converts unsigned int to float.""" instructions = [ s_mov_b32(s[0], 0xffffffff), v_mov_b32_e32(v[0], s[0]), v_cvt_f32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 4294967296.0, places=-5) class TestF16Conversions(unittest.TestCase): """Tests for f16 conversion instructions.""" def test_v_cvt_f16_f32_basic(self): """V_CVT_F16_F32 converts f32 to f16 in low 16 bits.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] lo_bits = result & 0xffff self.assertEqual(lo_bits, 0x3c00, f"Expected 0x3c00, got 0x{lo_bits:04x}") def test_v_cvt_f16_f32_negative(self): """V_CVT_F16_F32 converts negative f32 to f16.""" instructions = [ v_mov_b32_e32(v[0], -2.0), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] lo_bits = result & 0xffff self.assertEqual(lo_bits, 0xc000, f"Expected 0xc000, got 0x{lo_bits:04x}") def test_v_cvt_f16_f32_small(self): """V_CVT_F16_F32 converts small f32 value.""" instructions = [ v_mov_b32_e32(v[0], 0.5), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] lo_bits = result & 0xffff expected = f32_to_f16(0.5) self.assertEqual(lo_bits, expected, f"Expected 0x{expected:04x}, got 0x{lo_bits:04x}") def test_v_cvt_f16_f32_preserves_high_bits(self): """V_CVT_F16_F32 preserves high 16 bits of destination.""" instructions = [ s_mov_b32(s[0], 0xdead0000), v_mov_b32_e32(v[1], s[0]), v_mov_b32_e32(v[0], 1.0), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] hi_bits = (result >> 16) & 0xffff lo_bits = result & 0xffff self.assertEqual(lo_bits, 0x3c00, f"Low bits should be 0x3c00, got 0x{lo_bits:04x}") self.assertEqual(hi_bits, 0xdead, f"High bits should be preserved as 0xdead, got 0x{hi_bits:04x}") def test_v_cvt_f16_f32_same_src_dst_preserves_high_bits(self): """V_CVT_F16_F32 with same src/dst preserves high bits of source.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_cvt_f16_f32_e32(v[0], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][0] self.assertEqual(result, 0x3f803c00, f"Expected 0x3f803c00, got 0x{result:08x}") def test_v_cvt_f16_f32_reads_full_32bit_source(self): """V_CVT_F16_F32 must read full 32-bit f32 source.""" instructions = [ s_mov_b32(s[0], 0x3fc00000), # f32 1.5 v_mov_b32_e32(v[0], s[0]), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] lo_bits = result & 0xffff self.assertEqual(lo_bits, 0x3e00, f"Expected f16(1.5)=0x3e00, got 0x{lo_bits:04x} ({f16(lo_bits)})") def test_v_cvt_i16_f16_zero(self): """V_CVT_I16_F16 converts f16 zero to i16 zero.""" instructions = [ v_mov_b32_e32(v[0], 0), v_cvt_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 0, f"Expected 0, got {result}") def test_v_cvt_i16_f16_one(self): """V_CVT_I16_F16 converts f16 1.0 to i16 1.""" instructions = [ s_mov_b32(s[0], 0x3c00), # f16 1.0 in low bits v_mov_b32_e32(v[0], s[0]), v_cvt_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 1, f"Expected 1, got {result}") def test_v_cvt_i16_f16_negative(self): """V_CVT_I16_F16 converts f16 -2.0 to i16 -2.""" instructions = [ s_mov_b32(s[0], 0xc000), # f16 -2.0 in low bits v_mov_b32_e32(v[0], s[0]), v_cvt_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, (-2) & 0xffff, f"Expected 0xfffe (-2), got 0x{result:04x}") def test_v_cvt_i16_f16_from_hi(self): """V_CVT_I16_F16 can read from high 16 bits with opsel.""" instructions = [ s_mov_b32(s[0], 0x3c000000), # f16 1.0 in HIGH bits, 0 in low v_mov_b32_e32(v[0], s[0]), VOP3(VOP3Op.V_CVT_I16_F16, vdst=v[1], src0=v[0], opsel=0b0001), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 1, f"Expected 1 from high bits, got {result}") class TestF64Conversions(unittest.TestCase): """Tests for f64 conversion instructions. Regression tests for f32_to_f64/f64_to_f32.""" def test_v_cvt_f64_f32_one(self): """V_CVT_F64_F32 converts f32 1.0 to f64.""" instructions = [ s_mov_b32(s[0], f2i(1.0)), v_mov_b32_e32(v[0], s[0]), v_cvt_f64_f32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, 1.0, places=10) def test_v_cvt_f64_f32_negative(self): """V_CVT_F64_F32 converts f32 -2.5 to f64.""" instructions = [ s_mov_b32(s[0], f2i(-2.5)), v_mov_b32_e32(v[0], s[0]), v_cvt_f64_f32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, -2.5, places=10) def test_v_cvt_f64_f32_pi(self): """V_CVT_F64_F32 converts f32 pi to f64.""" instructions = [ s_mov_b32(s[0], f2i(3.14159265)), v_mov_b32_e32(v[0], s[0]), v_cvt_f64_f32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, 3.14159265, places=5) def test_v_cvt_f64_f32_zero(self): """V_CVT_F64_F32 converts f32 0.0 to f64.""" instructions = [ v_mov_b32_e32(v[0], 0), v_cvt_f64_f32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertEqual(result, 0.0) def test_v_cvt_f32_f64_one(self): """V_CVT_F32_F64 converts f64 1.0 to f32.""" f64_bits = f2i64(1.0) lo, hi = f64_bits & 0xFFFFFFFF, (f64_bits >> 32) & 0xFFFFFFFF instructions = [ s_mov_b32(s[0], lo), s_mov_b32(s[1], hi), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cvt_f32_f64_e32(v[2], v[0:1]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][2]) self.assertAlmostEqual(result, 1.0, places=5) def test_v_cvt_f32_f64_negative(self): """V_CVT_F32_F64 converts f64 -3.5 to f32.""" f64_bits = f2i64(-3.5) lo, hi = f64_bits & 0xFFFFFFFF, (f64_bits >> 32) & 0xFFFFFFFF instructions = [ s_mov_b32(s[0], lo), s_mov_b32(s[1], hi), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cvt_f32_f64_e32(v[2], v[0:1]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][2]) self.assertAlmostEqual(result, -3.5, places=5) def test_v_cvt_f32_f64_large(self): """V_CVT_F32_F64 converts large f64 to f32.""" f64_bits = f2i64(123456.789) lo, hi = f64_bits & 0xFFFFFFFF, (f64_bits >> 32) & 0xFFFFFFFF instructions = [ s_mov_b32(s[0], lo), s_mov_b32(s[1], hi), v_mov_b32_e32(v[0], s[0]), v_mov_b32_e32(v[1], s[1]), v_cvt_f32_f64_e32(v[2], v[0:1]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][2]) self.assertAlmostEqual(result, 123456.789, places=0) def test_v_cvt_f64_i32_positive(self): """V_CVT_F64_I32 converts positive i32 to f64.""" instructions = [ s_mov_b32(s[0], 42), v_mov_b32_e32(v[0], s[0]), v_cvt_f64_i32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, 42.0, places=10) def test_v_cvt_f64_i32_negative(self): """V_CVT_F64_I32 converts negative i32 to f64.""" instructions = [ s_mov_b32(s[0], 0xFFFFFFFF), # -1 as i32 v_mov_b32_e32(v[0], s[0]), v_cvt_f64_i32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, -1.0, places=10) def test_v_cvt_f64_u32_large(self): """V_CVT_F64_U32 converts large u32 to f64.""" instructions = [ s_mov_b32(s[0], 0xFFFFFFFF), # max u32 v_mov_b32_e32(v[0], s[0]), v_cvt_f64_u32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertAlmostEqual(result, 4294967295.0, places=0) def test_v_cvt_f64_u32_zero(self): """V_CVT_F64_U32 converts 0 to f64.""" instructions = [ v_mov_b32_e32(v[0], 0), v_cvt_f64_u32_e32(v[2:3], v[0]), ] st = run_program(instructions, n_lanes=1) result = i642f((st.vgpr[0][3] << 32) | st.vgpr[0][2]) self.assertEqual(result, 0.0) class TestClz(unittest.TestCase): """Tests for V_CLZ_I32_U32 - count leading zeros.""" def test_v_clz_i32_u32_zero(self): """V_CLZ_I32_U32 of 0 returns -1 (all bits are 0).""" instructions = [ v_mov_b32_e32(v[0], 0), v_clz_i32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0xFFFFFFFF) def test_v_clz_i32_u32_one(self): """V_CLZ_I32_U32 of 1 returns 31 (31 leading zeros).""" instructions = [ v_mov_b32_e32(v[0], 1), v_clz_i32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 31) def test_v_clz_i32_u32_msb_set(self): """V_CLZ_I32_U32 of 0x80000000 returns 0 (no leading zeros).""" instructions = [ s_mov_b32(s[0], 0x80000000), v_mov_b32_e32(v[0], s[0]), v_clz_i32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0) def test_v_clz_i32_u32_half(self): """V_CLZ_I32_U32 of 0x8000 (bit 15) returns 16.""" instructions = [ s_mov_b32(s[0], 0x8000), v_mov_b32_e32(v[0], s[0]), v_clz_i32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 16) def test_v_clz_i32_u32_all_ones(self): """V_CLZ_I32_U32 of 0xFFFFFFFF returns 0.""" instructions = [ s_mov_b32(s[0], 0xFFFFFFFF), v_mov_b32_e32(v[0], s[0]), v_clz_i32_u32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0) class TestCtz(unittest.TestCase): """Tests for V_CTZ_I32_B32 - count trailing zeros.""" def test_v_ctz_i32_b32_zero(self): """V_CTZ_I32_B32 of 0 returns -1 (all bits are 0).""" instructions = [ v_mov_b32_e32(v[0], 0), v_ctz_i32_b32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0xFFFFFFFF) def test_v_ctz_i32_b32_one(self): """V_CTZ_I32_B32 of 1 returns 0 (no trailing zeros).""" instructions = [ v_mov_b32_e32(v[0], 1), v_ctz_i32_b32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0) def test_v_ctz_i32_b32_msb_set(self): """V_CTZ_I32_B32 of 0x80000000 returns 31.""" instructions = [ s_mov_b32(s[0], 0x80000000), v_mov_b32_e32(v[0], s[0]), v_ctz_i32_b32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 31) def test_v_ctz_i32_b32_half(self): """V_CTZ_I32_B32 of 0x8000 (bit 15) returns 15.""" instructions = [ s_mov_b32(s[0], 0x8000), v_mov_b32_e32(v[0], s[0]), v_ctz_i32_b32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 15) def test_v_ctz_i32_b32_all_ones(self): """V_CTZ_I32_B32 of 0xFFFFFFFF returns 0.""" instructions = [ s_mov_b32(s[0], 0xFFFFFFFF), v_mov_b32_e32(v[0], s[0]), v_ctz_i32_b32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0) class TestRcp(unittest.TestCase): """Tests for V_RCP_F32 - reciprocal.""" def test_v_rcp_f32_normal(self): """V_RCP_F32 of 2.0 returns 0.5.""" instructions = [ v_mov_b32_e32(v[0], 2.0), v_rcp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.5, places=5) def test_v_rcp_f32_inf(self): """V_RCP_F32 of +inf returns 0.""" instructions = [ s_mov_b32(s[0], 0x7f800000), v_mov_b32_e32(v[0], s[0]), v_rcp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_rcp_f32_neg_inf(self): """V_RCP_F32 of -inf returns -0.""" instructions = [ s_mov_b32(s[0], 0xff800000), v_mov_b32_e32(v[0], s[0]), v_rcp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertEqual(result, 0.0) self.assertEqual(st.vgpr[0][1], 0x80000000) def test_v_rcp_f32_zero(self): """V_RCP_F32 of 0 returns +inf.""" import math instructions = [ v_mov_b32_e32(v[0], 0), v_rcp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) class TestExp(unittest.TestCase): """Tests for V_EXP_F32 - base-2 exponential.""" def test_v_exp_f32_large_negative(self): """V_EXP_F32 of large negative value (2^-100) returns very small number.""" instructions = [ s_mov_b32(s[0], f2i(-100.0)), v_mov_b32_e32(v[0], s[0]), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertLess(result, 1e-20) def test_v_exp_f32_large_positive(self): """V_EXP_F32 of large positive value (2^100) returns very large number.""" instructions = [ s_mov_b32(s[0], f2i(100.0)), v_mov_b32_e32(v[0], s[0]), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertGreater(result, 1e20) class TestReadFirstLane(unittest.TestCase): """Tests for V_READFIRSTLANE_B32.""" def _readfirstlane(self, sdst, vsrc): """Helper to create V_READFIRSTLANE_B32 with SGPR destination.""" return v_readfirstlane_b32_e32(sdst, vsrc) def test_v_readfirstlane_b32_basic(self): """V_READFIRSTLANE_B32 reads from the first active lane.""" instructions = [ v_lshlrev_b32_e32(v[0], 2, v[255]), v_add_nc_u32_e32(v[0], 1000, v[0]), self._readfirstlane(s[0], v[0]), v_mov_b32_e32(v[1], s[0]), ] st = run_program(instructions, n_lanes=4) for lane in range(4): self.assertEqual(st.vgpr[lane][1], 1000) def test_v_readfirstlane_b32_different_vgpr(self): """V_READFIRSTLANE_B32 reading from different VGPR index.""" instructions = [ v_lshlrev_b32_e32(v[7], 5, v[255]), v_add_nc_u32_e32(v[7], 200, v[7]), self._readfirstlane(s[0], v[7]), v_mov_b32_e32(v[8], s[0]), ] st = run_program(instructions, n_lanes=4) for lane in range(4): self.assertEqual(st.vgpr[lane][8], 200) class TestCvtF16Modifiers(unittest.TestCase): """Tests for V_CVT_F32_F16 with VOP3 abs/neg modifiers.""" def test_v_cvt_f32_f16_abs_negative(self): """V_CVT_F32_F16 with |abs| on negative value.""" f16_neg1 = f32_to_f16(-1.0) # 0xbc00 instructions = [ s_mov_b32(s[0], f16_neg1), v_mov_b32_e32(v[1], s[0]), v_cvt_f32_f16_e64(v[0], abs(v[1])), # |(-1.0)| = 1.0 ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][0]) self.assertAlmostEqual(result, 1.0, places=5) def test_v_cvt_f32_f16_abs_positive(self): """V_CVT_F32_F16 with |abs| on positive value (should stay positive).""" f16_2 = f32_to_f16(2.0) # 0x4000 instructions = [ s_mov_b32(s[0], f16_2), v_mov_b32_e32(v[1], s[0]), v_cvt_f32_f16_e64(v[0], abs(v[1])), # |2.0| = 2.0 ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][0]) self.assertAlmostEqual(result, 2.0, places=5) def test_v_cvt_f32_f16_neg_positive(self): """V_CVT_F32_F16 with neg on positive value.""" f16_2 = f32_to_f16(2.0) # 0x4000 instructions = [ s_mov_b32(s[0], f16_2), v_mov_b32_e32(v[1], s[0]), v_cvt_f32_f16_e64(v[0], -v[1]), # -(2.0) = -2.0 ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][0]) self.assertAlmostEqual(result, -2.0, places=5) def test_v_cvt_f32_f16_neg_negative(self): """V_CVT_F32_F16 with neg on negative value (double negative).""" f16_neg2 = f32_to_f16(-2.0) # 0xc000 instructions = [ s_mov_b32(s[0], f16_neg2), v_mov_b32_e32(v[1], s[0]), v_cvt_f32_f16_e64(v[0], -v[1]), # -(-2.0) = 2.0 ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][0]) self.assertAlmostEqual(result, 2.0, places=5) def test_v_cvt_f16_f32_then_pack_for_wmma(self): """CVT F32->F16 followed by pack (common WMMA pattern).""" f32_val = 3.5 instructions = [ s_mov_b32(s[0], f2i(f32_val)), v_mov_b32_e32(v[0], s[0]), v_cvt_f16_f32_e32(v[1], v[0]), v_pack_b32_f16(v[2], v[1], v[1]), # Pack same value ] st = run_program(instructions, n_lanes=1) lo = f16(st.vgpr[0][2] & 0xffff) hi = f16((st.vgpr[0][2] >> 16) & 0xffff) self.assertAlmostEqual(lo, f32_val, places=1) self.assertAlmostEqual(hi, f32_val, places=1) class TestConversionRounding(unittest.TestCase): """Tests for conversion rounding behavior.""" def test_cvt_f32_to_i32_round_toward_zero(self): """F32 to I32 should truncate (round toward zero).""" instructions = [ v_mov_b32_e32(v[0], 2.9), v_mov_b32_e32(v[1], -2.9), v_cvt_i32_f32_e32(v[2], v[0]), v_cvt_i32_f32_e32(v[3], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][2], 2, "2.9 -> 2") self.assertEqual(st.vgpr[0][3] & 0xFFFFFFFF, 0xFFFFFFFE, "-2.9 -> -2") def test_cvt_f32_to_u32_negative(self): """F32 to U32 with negative input should clamp to 0.""" instructions = [ v_mov_b32_e32(v[0], -1.0), v_cvt_u32_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0) def test_rndne_f32_half_even(self): """V_RNDNE_F32 should round to nearest even.""" instructions = [ v_mov_b32_e32(v[0], 2.5), v_mov_b32_e32(v[1], 3.5), v_mov_b32_e32(v[2], 4.5), v_rndne_f32_e32(v[3], v[0]), v_rndne_f32_e32(v[4], v[1]), v_rndne_f32_e32(v[5], v[2]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][3]), 2.0, places=5) # 2.5 -> 2 (even) self.assertAlmostEqual(i2f(st.vgpr[0][4]), 4.0, places=5) # 3.5 -> 4 (even) self.assertAlmostEqual(i2f(st.vgpr[0][5]), 4.0, places=5) # 4.5 -> 4 (even) def test_f16_to_f32_precision(self): """F16 to F32 conversion precision.""" f16_val = f32_to_f16(1.5) instructions = [ s_mov_b32(s[0], f16_val), v_mov_b32_e32(v[0], s[0]), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.5, places=5) def test_f16_denormal_to_f32(self): """F16 denormal converts to small positive f32.""" f16_denorm = 0x0001 # Smallest positive f16 denormal instructions = [ v_mov_b32_e32(v[0], f16_denorm), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertGreater(result, 0) self.assertLess(result, 1e-6) class TestSqrt(unittest.TestCase): """Tests for V_SQRT_F32 - square root.""" def test_v_sqrt_f32_normal(self): """V_SQRT_F32 of 4.0 returns 2.0.""" instructions = [ v_mov_b32_e32(v[0], 4.0), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 2.0, places=5) def test_v_sqrt_f32_one(self): """V_SQRT_F32 of 1.0 returns 1.0.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=5) def test_v_sqrt_f32_zero(self): """V_SQRT_F32 of 0.0 returns 0.0.""" instructions = [ v_mov_b32_e32(v[0], 0), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_sqrt_f32_neg_zero(self): """V_SQRT_F32 of -0.0 returns -0.0.""" instructions = [ s_mov_b32(s[0], 0x80000000), # -0.0 v_mov_b32_e32(v[0], s[0]), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0x80000000) # -0.0 def test_v_sqrt_f32_inf(self): """V_SQRT_F32 of +inf returns +inf.""" import math instructions = [ s_mov_b32(s[0], 0x7f800000), # +inf v_mov_b32_e32(v[0], s[0]), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertGreater(i2f(st.vgpr[0][1]), 0) def test_v_sqrt_f32_negative(self): """V_SQRT_F32 of negative value returns NaN.""" import math instructions = [ v_mov_b32_e32(v[0], -1.0), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) def test_v_sqrt_f32_nan(self): """V_SQRT_F32 of NaN returns NaN.""" import math instructions = [ s_mov_b32(s[0], 0x7fc00000), # quiet NaN v_mov_b32_e32(v[0], s[0]), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) def test_v_sqrt_f32_small(self): """V_SQRT_F32 of small value (0.25) returns 0.5.""" instructions = [ v_mov_b32_e32(v[0], 0.25), v_sqrt_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.5, places=5) class TestRsq(unittest.TestCase): """Tests for V_RSQ_F32 - reciprocal square root (1/sqrt(x)).""" def test_v_rsq_f32_normal(self): """V_RSQ_F32 of 4.0 returns 0.5.""" instructions = [ v_mov_b32_e32(v[0], 4.0), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.5, places=5) def test_v_rsq_f32_one(self): """V_RSQ_F32 of 1.0 returns 1.0.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=5) def test_v_rsq_f32_zero(self): """V_RSQ_F32 of 0 returns +inf.""" import math instructions = [ v_mov_b32_e32(v[0], 0), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertGreater(i2f(st.vgpr[0][1]), 0) def test_v_rsq_f32_neg_zero(self): """V_RSQ_F32 of -0.0 returns -inf.""" import math instructions = [ s_mov_b32(s[0], 0x80000000), # -0.0 v_mov_b32_e32(v[0], s[0]), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertLess(i2f(st.vgpr[0][1]), 0) def test_v_rsq_f32_inf(self): """V_RSQ_F32 of +inf returns 0.""" instructions = [ s_mov_b32(s[0], 0x7f800000), # +inf v_mov_b32_e32(v[0], s[0]), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_rsq_f32_negative(self): """V_RSQ_F32 of negative value returns NaN.""" import math instructions = [ v_mov_b32_e32(v[0], -1.0), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) def test_v_rsq_f32_large(self): """V_RSQ_F32 of large value.""" instructions = [ s_mov_b32(s[0], f2i(1e10)), v_mov_b32_e32(v[0], s[0]), v_rsq_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) # 1/sqrt(1e10) ~= 1e-5 self.assertAlmostEqual(result, 1e-5, places=8) class TestLog(unittest.TestCase): """Tests for V_LOG_F32 - base-2 logarithm.""" def test_v_log_f32_one(self): """V_LOG_F32 of 1.0 returns 0.0.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.0, places=4) def test_v_log_f32_two(self): """V_LOG_F32 of 2.0 returns 1.0.""" instructions = [ v_mov_b32_e32(v[0], 2.0), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=4) def test_v_log_f32_four(self): """V_LOG_F32 of 4.0 returns 2.0.""" instructions = [ v_mov_b32_e32(v[0], 4.0), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 2.0, places=4) def test_v_log_f32_half(self): """V_LOG_F32 of 0.5 returns -1.0.""" instructions = [ v_mov_b32_e32(v[0], 0.5), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), -1.0, places=4) def test_v_log_f32_zero(self): """V_LOG_F32 of 0 returns -inf.""" import math instructions = [ v_mov_b32_e32(v[0], 0), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertLess(i2f(st.vgpr[0][1]), 0) def test_v_log_f32_inf(self): """V_LOG_F32 of +inf returns +inf.""" import math instructions = [ s_mov_b32(s[0], 0x7f800000), # +inf v_mov_b32_e32(v[0], s[0]), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertGreater(i2f(st.vgpr[0][1]), 0) def test_v_log_f32_negative(self): """V_LOG_F32 of negative value returns NaN.""" import math instructions = [ v_mov_b32_e32(v[0], -1.0), v_log_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) class TestCos(unittest.TestCase): """Tests for V_COS_F32 - cosine (input in cycles, not radians).""" def test_v_cos_f32_zero(self): """V_COS_F32 at 0 cycles = cos(0) = 1.0.""" instructions = [ v_mov_b32_e32(v[0], 0), v_cos_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=4) def test_v_cos_f32_quarter(self): """V_COS_F32 at 0.25 cycles = cos(pi/2) = 0.0.""" instructions = [ s_mov_b32(s[0], f2i(0.25)), v_mov_b32_e32(v[0], s[0]), v_cos_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.0, places=4) def test_v_cos_f32_half(self): """V_COS_F32 at 0.5 cycles = cos(pi) = -1.0.""" instructions = [ s_mov_b32(s[0], f2i(0.5)), v_mov_b32_e32(v[0], s[0]), v_cos_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), -1.0, places=4) def test_v_cos_f32_full(self): """V_COS_F32 at 1.0 cycles = cos(2*pi) = 1.0.""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_cos_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=4) def test_v_cos_f32_large(self): """V_COS_F32 for large input value.""" import math val = 132000.0 instructions = [ s_mov_b32(s[0], f2i(val)), v_mov_b32_e32(v[0], s[0]), v_cos_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) expected = math.cos(val * 2 * math.pi) self.assertAlmostEqual(result, expected, places=2) class TestFractEdgeCases(unittest.TestCase): """Additional edge case tests for V_FRACT_F32.""" def test_v_fract_f32_negative(self): """V_FRACT_F32 of -1.25 should return 0.75 (fract is always positive).""" instructions = [ s_mov_b32(s[0], f2i(-1.25)), v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 0.75, places=5) def test_v_fract_f32_negative_small(self): """V_FRACT_F32 of -0.25 should return 0.75.""" instructions = [ s_mov_b32(s[0], f2i(-0.25)), v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 0.75, places=5) def test_v_fract_f32_whole_number(self): """V_FRACT_F32 of 5.0 should return 0.0.""" instructions = [ v_mov_b32_e32(v[0], 5.0), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 0.0, places=5) def test_v_fract_f32_negative_whole(self): """V_FRACT_F32 of -5.0 should return 0.0.""" instructions = [ v_mov_b32_e32(v[0], -5.0), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 0.0, places=5) def test_v_fract_f32_zero(self): """V_FRACT_F32 of 0.0 returns 0.0.""" instructions = [ v_mov_b32_e32(v[0], 0), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_fract_f32_inf(self): """V_FRACT_F32 of +inf returns NaN.""" import math instructions = [ s_mov_b32(s[0], 0x7f800000), # +inf v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) def test_v_fract_f32_nan(self): """V_FRACT_F32 of NaN returns NaN.""" import math instructions = [ s_mov_b32(s[0], 0x7fc00000), # quiet NaN v_mov_b32_e32(v[0], s[0]), v_fract_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) class TestF16EdgeCases(unittest.TestCase): """Additional F16 conversion edge cases.""" def test_v_cvt_f32_f16_inf(self): """V_CVT_F32_F16 converts f16 infinity to f32 infinity.""" import math instructions = [ s_mov_b32(s[0], 0x7c00), # f16 +inf v_mov_b32_e32(v[0], s[0]), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertGreater(i2f(st.vgpr[0][1]), 0) def test_v_cvt_f32_f16_neg_inf(self): """V_CVT_F32_F16 converts f16 -inf to f32 -inf.""" import math instructions = [ s_mov_b32(s[0], 0xfc00), # f16 -inf v_mov_b32_e32(v[0], s[0]), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertLess(i2f(st.vgpr[0][1]), 0) def test_v_cvt_f32_f16_nan(self): """V_CVT_F32_F16 converts f16 NaN to f32 NaN.""" import math instructions = [ s_mov_b32(s[0], 0x7e00), # f16 quiet NaN v_mov_b32_e32(v[0], s[0]), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) def test_v_cvt_f32_f16_neg_zero(self): """V_CVT_F32_F16 preserves negative zero.""" instructions = [ s_mov_b32(s[0], 0x8000), # f16 -0.0 v_mov_b32_e32(v[0], s[0]), v_cvt_f32_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0x80000000) def test_v_cvt_f16_f32_overflow(self): """V_CVT_F16_F32 converts large f32 to f16 infinity.""" instructions = [ s_mov_b32(s[0], f2i(100000.0)), # too large for f16 v_mov_b32_e32(v[0], s[0]), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) lo_bits = st.vgpr[0][1] & 0xffff self.assertEqual(lo_bits, 0x7c00) # f16 +inf def test_v_cvt_f16_f32_underflow(self): """V_CVT_F16_F32 converts very small f32 to f16 zero or denormal.""" instructions = [ s_mov_b32(s[0], f2i(1e-10)), # very small, below f16 range v_mov_b32_e32(v[0], s[0]), v_cvt_f16_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) lo_bits = st.vgpr[0][1] & 0xffff # Should be zero or very small denormal self.assertLess(lo_bits, 0x0400) # Less than smallest normal f16 class TestExpEdgeCases(unittest.TestCase): """Additional edge cases for V_EXP_F32.""" def test_v_exp_f32_zero(self): """V_EXP_F32 of 0.0 returns 1.0 (2^0 = 1).""" instructions = [ v_mov_b32_e32(v[0], 0), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 1.0, places=5) def test_v_exp_f32_one(self): """V_EXP_F32 of 1.0 returns 2.0 (2^1 = 2).""" instructions = [ v_mov_b32_e32(v[0], 1.0), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 2.0, places=5) def test_v_exp_f32_neg_one(self): """V_EXP_F32 of -1.0 returns 0.5 (2^-1 = 0.5).""" instructions = [ v_mov_b32_e32(v[0], -1.0), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), 0.5, places=5) def test_v_exp_f32_inf(self): """V_EXP_F32 of +inf returns +inf.""" import math instructions = [ s_mov_b32(s[0], 0x7f800000), # +inf v_mov_b32_e32(v[0], s[0]), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isinf(i2f(st.vgpr[0][1]))) self.assertGreater(i2f(st.vgpr[0][1]), 0) def test_v_exp_f32_neg_inf(self): """V_EXP_F32 of -inf returns 0.""" instructions = [ s_mov_b32(s[0], 0xff800000), # -inf v_mov_b32_e32(v[0], s[0]), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_exp_f32_nan(self): """V_EXP_F32 of NaN returns NaN.""" import math instructions = [ s_mov_b32(s[0], 0x7fc00000), # quiet NaN v_mov_b32_e32(v[0], s[0]), v_exp_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertTrue(math.isnan(i2f(st.vgpr[0][1]))) class TestFloorEdgeCases(unittest.TestCase): """Additional edge cases for V_FLOOR_F32.""" def test_v_floor_f32_negative(self): """V_FLOOR_F32 of -2.3 returns -3.0.""" instructions = [ s_mov_b32(s[0], f2i(-2.3)), v_mov_b32_e32(v[0], s[0]), v_floor_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), -3.0, places=5) def test_v_floor_f32_neg_zero(self): """V_FLOOR_F32 of -0.0 returns -0.0.""" instructions = [ s_mov_b32(s[0], 0x80000000), # -0.0 v_mov_b32_e32(v[0], s[0]), v_floor_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][1], 0x80000000) def test_v_floor_f32_small_positive(self): """V_FLOOR_F32 of 0.9 returns 0.0.""" instructions = [ s_mov_b32(s[0], f2i(0.9)), v_mov_b32_e32(v[0], s[0]), v_floor_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(i2f(st.vgpr[0][1]), 0.0) def test_v_floor_f32_small_negative(self): """V_FLOOR_F32 of -0.9 returns -1.0.""" instructions = [ s_mov_b32(s[0], f2i(-0.9)), v_mov_b32_e32(v[0], s[0]), v_floor_f32_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertAlmostEqual(i2f(st.vgpr[0][1]), -1.0, places=5) class TestVop1F16HiHalf(unittest.TestCase): """Regression tests for VOP1 f16 hi-half source operand handling. For 16-bit VOP1 operations, when src0 is in the range v[128]+ (offset >= 384), the hardware reads from the high 16 bits of v[src0-128]. The emulator must extract bits [31:16] from the actual VGPR. """ def test_v_cvt_f32_f16_src_hi_half(self): """V_CVT_F32_F16 with source from hi-half (v[128]+). When src0 >= v[128], it reads from the high 16 bits of v[src0-128]. This is critical for global_load_d16_hi_b16 + v_cvt_f32_f16 patterns. Regression test for: VOP1 f16 src0 hi-half extraction bug. """ instructions = [ # v[0] = 0x4000_3c00: hi=f16(2.0), lo=f16(1.0) s_mov_b32(s[0], 0x40003c00), v_mov_b32_e32(v[0], s[0]), # v_cvt_f32_f16 v[1], v[128] (reads hi half of v[0]) # Should convert f16(2.0) to f32(2.0) v_cvt_f32_f16_e32(v[1], v[128]), ] st = run_program(instructions, n_lanes=1) result = i2f(st.vgpr[0][1]) self.assertAlmostEqual(result, 2.0, places=5, msg=f"Expected f32(2.0), got {result}") def test_v_cvt_f32_f16_src_lo_vs_hi(self): """V_CVT_F32_F16 comparing lo and hi half reads. v[0] has different values in lo and hi halves. v_cvt_f32_f16 v[1], v[0] should read lo (1.0) v_cvt_f32_f16 v[2], v[128] should read hi (2.0) Regression test for: VOP1 f16 src0 hi-half extraction bug. """ instructions = [ # v[0] = 0x4000_3c00: hi=f16(2.0), lo=f16(1.0) s_mov_b32(s[0], 0x40003c00), v_mov_b32_e32(v[0], s[0]), # Read from lo half v_cvt_f32_f16_e32(v[1], v[0]), # Read from hi half v_cvt_f32_f16_e32(v[2], v[128]), ] st = run_program(instructions, n_lanes=1) result_lo = i2f(st.vgpr[0][1]) result_hi = i2f(st.vgpr[0][2]) self.assertAlmostEqual(result_lo, 1.0, places=5, msg=f"Expected f32(1.0) from lo, got {result_lo}") self.assertAlmostEqual(result_hi, 2.0, places=5, msg=f"Expected f32(2.0) from hi, got {result_hi}") def test_v_cvt_i16_f16_src_hi_half(self): """V_CVT_I16_F16 with source from hi-half. Regression test for: VOP1 f16 src0 hi-half extraction bug. """ instructions = [ # v[0] = 0xc000_3c00: hi=f16(-2.0), lo=f16(1.0) s_mov_b32(s[0], 0xc0003c00), v_mov_b32_e32(v[0], s[0]), # v_cvt_i16_f16 v[1], v[128] (reads hi half of v[0]) # Should convert f16(-2.0) to i16(-2) v_cvt_i16_f16_e32(v[1], v[128]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff expected = (-2) & 0xffff self.assertEqual(result, expected, f"Expected i16(-2)=0x{expected:04x}, got 0x{result:04x}") def test_v_mov_b16_src_hi_half(self): """V_MOV_B16 with source from hi-half. Regression test for: VOP1 f16 src0 hi-half extraction bug. """ instructions = [ # v[0] = 0xBEEF_DEAD: hi=0xBEEF, lo=0xDEAD s_mov_b32(s[0], 0xBEEFDEAD), v_mov_b32_e32(v[0], s[0]), # v[1] = 0x0000_0000 initially v_mov_b32_e32(v[1], 0), # v_mov_b16 v[1], v[128] (reads hi half of v[0]) # Should move 0xBEEF to v[1].lo v_mov_b16_e32(v[1], v[128]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 0xBEEF, f"Expected 0xBEEF from hi half, got 0x{result:04x}") class TestReciprocalF16(unittest.TestCase): """Tests for V_RCP_F16 - reciprocal in half precision. The pcode uses a 16-bit float literal: D0.f16 = 16'1.0 / S0.f16 This tests that the sized float literal (16'1.0) is correctly parsed. """ def test_v_rcp_f16_one(self): """V_RCP_F16: 1/1.0 = 1.0""" import struct def f16_to_bits(f): return struct.unpack(' i16 max (32767).""" instructions = [ s_mov_b32(s[0], f32_to_f16(1.0)), v_mov_b32_e32(v[0], s[0]), v_cvt_norm_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 32767) def test_cvt_norm_i16_f16_negative(self): """V_CVT_NORM_I16_F16: f16 -1.0 -> i16 -32767 (0x8001).""" instructions = [ s_mov_b32(s[0], f32_to_f16(-1.0)), v_mov_b32_e32(v[0], s[0]), v_cvt_norm_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 0x8001) # -32767, hardware uses symmetric range def test_cvt_norm_i16_f16_zero(self): """V_CVT_NORM_I16_F16: f16 0.0 -> i16 0.""" instructions = [ v_mov_b32_e32(v[0], 0), v_cvt_norm_i16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 0) def test_cvt_norm_u16_f16_one(self): """V_CVT_NORM_U16_F16: f16 1.0 -> u16 max (65535).""" instructions = [ s_mov_b32(s[0], f32_to_f16(1.0)), v_mov_b32_e32(v[0], s[0]), v_cvt_norm_u16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertEqual(result, 65535) def test_cvt_norm_u16_f16_half(self): """V_CVT_NORM_U16_F16: f16 0.5 -> u16 ~32768.""" instructions = [ s_mov_b32(s[0], f32_to_f16(0.5)), v_mov_b32_e32(v[0], s[0]), v_cvt_norm_u16_f16_e32(v[1], v[0]), ] st = run_program(instructions, n_lanes=1) result = st.vgpr[0][1] & 0xffff self.assertAlmostEqual(result, 32768, delta=1) class TestPermlane64(unittest.TestCase): """Tests for V_PERMLANE64_B32 instruction (wave64 cross-half swap).""" def test_v_permlane64_b32_is_nop_in_wave32(self): """V_PERMLANE64_B32 is a NOP in wave32 mode. Per AMD pcode: "if WAVE32 then s_nop(...) else ... endif" The emulator runs in wave32 mode, so this instruction should not modify registers. """ instructions = [ v_mov_b32_e32(v[0], 0xCAFEBABE), # source v_mov_b32_e32(v[1], 0x12345678), # dest (should be preserved) v_permlane64_b32_e32(v[1], v[0]), # NOP in wave32 ] st = run_program(instructions, n_lanes=1) # Dest register should be unchanged (NOP behavior in wave32) self.assertEqual(st.vgpr[0][1], 0x12345678) class TestSwap(unittest.TestCase): """Tests for V_SWAP_B32 - swap two VGPRs.""" def test_v_swap_b32_basic(self): """V_SWAP_B32 swaps two VGPR values.""" instructions = [ v_mov_b32_e32(v[0], 42), v_mov_b32_e32(v[1], 99), v_swap_b32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][0], 99) self.assertEqual(st.vgpr[0][1], 42) def test_v_swap_b32_same_reg(self): """V_SWAP_B32 with same src and dst is a no-op.""" instructions = [ v_mov_b32_e32(v[0], 0xDEADBEEF), v_swap_b32_e32(v[0], v[0]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][0], 0xDEADBEEF) def test_v_swap_b32_multi_lane(self): """V_SWAP_B32 swaps per-lane values independently.""" instructions = [ # v[0] = lane_id * 10, v[1] = lane_id * 100 v_lshlrev_b32_e32(v[0], 1, v[255]), # v[0] = lane_id * 2 v_add_nc_u32_e32(v[0], v[0], v[255]), # v[0] = lane_id * 3 v_mul_u32_u24_e32(v[1], 100, v[255]), # v[1] = lane_id * 100 v_swap_b32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=4) for lane in range(4): self.assertEqual(st.vgpr[lane][0], lane * 100) self.assertEqual(st.vgpr[lane][1], lane * 3) def test_v_swap_b32_chain(self): """Two swaps in sequence restore original values.""" instructions = [ v_mov_b32_e32(v[0], 0xAAAAAAAA), v_mov_b32_e32(v[1], 0x55555555), v_swap_b32_e32(v[0], v[1]), v_swap_b32_e32(v[0], v[1]), ] st = run_program(instructions, n_lanes=1) self.assertEqual(st.vgpr[0][0], 0xAAAAAAAA) self.assertEqual(st.vgpr[0][1], 0x55555555) if __name__ == '__main__': unittest.main()