use offical gguf in test (#14872)

also deleted bad test_load_sample_mxfp4, added some hard coded simple tests
2026-06-11 23:46:02 +08:00 · 2026-02-18 19:55:09 -05:00
parent 8c830c5b44
commit e8252e6e4f
3 changed files with 47 additions and 75 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,7 +1,7 @@
 name: Unit Tests
 env:
  # increment this when downloads substantially change to avoid the internet
-  CACHE_VERSION: '16'
+  CACHE_VERSION: '17'
  CAPTURE_PROCESS_REPLAY: 1
  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  PYTHONPATH: ${{ github.workspace }}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,7 +74,7 @@ testing_minimal = [
  "hypothesis>=6.148.9",
  "z3-solver<4.15.4",  # 4.15.4 has a segfault when creating many z3.Context()
 ]
-testing_unit = ["tinygrad[testing_minimal]", "tqdm", "safetensors", "tabulate", "openai", "ggml-python"]
+testing_unit = ["tinygrad[testing_minimal]", "tqdm", "safetensors", "tabulate", "openai", "gguf"]
 testing = [
  "tinygrad[testing_unit]",
  "pillow",
--- a/test/unit/test_gguf.py
+++ b/test/unit/test_gguf.py
@@ -1,62 +1,38 @@
-import os, unittest, ctypes
+import os, unittest
 from tinygrad import dtypes, Tensor, fetch, Device
 from tinygrad.nn.state import ggml_data_to_tensor, gguf_load
 from tinygrad.device import is_dtype_supported
 import numpy as np
-import ggml
+from gguf import GGUFReader, GGUFValueType, GGMLQuantizationType, GGML_QUANT_SIZES, dequantize, quantize

 ggml_test_block_count = 4
-ggml_type_to_np_dtype = {
-  ggml.GGML_TYPE_F16: np.float16, ggml.GGML_TYPE_F32:np.float32, ggml.GGML_TYPE_F64:np.float64,
-  ggml.GGML_TYPE_I8:np.int8, ggml.GGML_TYPE_I16: np.int16, ggml.GGML_TYPE_I32: np.int32, ggml.GGML_TYPE_I64: np.int64,
-}
-np_dtype_to_ctype = { np.float16: ctypes.c_uint16 }
-gguf_val_getters = [
-  ggml.gguf_get_val_u8, ggml.gguf_get_val_i8, ggml.gguf_get_val_u16, ggml.gguf_get_val_i16,
-  ggml.gguf_get_val_u32, ggml.gguf_get_val_i32, ggml.gguf_get_val_f32, ggml.gguf_get_val_bool,
-  lambda *args: ggml.gguf_get_val_str(*args).decode("utf-8"), None,
-  ggml.gguf_get_val_u64, ggml.gguf_get_val_i64, ggml.gguf_get_val_f64,
-]
-
-def ggml_tensor_to_numpy(tensor: ggml.ggml_tensor_p):
-  ctx: ggml.ggml_context_p | None = None
-  ggml_type, n_dims, n_els = tensor.contents.type, ggml.ggml_n_dims(tensor), ggml.ggml_nelements(tensor)
-  shape = tuple(reversed(tensor.contents.ne[:n_dims]))
-  if ggml_type not in ggml_type_to_np_dtype:
-    ctx = ggml.ggml_init(ggml.ggml_init_params(mem_size=n_els * 5 + 500, mem_buffer=None))
-    ntensor = ggml.ggml_new_tensor(ctx, ggml.GGML_TYPE_F32, n_dims, tensor.contents.ne)
-    type_traits = ggml.ggml_internal_get_type_traits(ggml_type)
-    type_traits.to_float(ggml.ggml_get_data(tensor), ggml.ggml_get_data_f32(ntensor), n_els)
-    tensor, ggml_type = ntensor, ggml.GGML_TYPE_F32
-
-  np_type = ggml_type_to_np_dtype[ggml_type]
-  ctypes_type = np_dtype_to_ctype.get(np_type, None) or np.ctypeslib.as_ctypes_type(np_type)
-  data = ggml.ggml_get_data(tensor)
-  if data is None: raise ValueError("tensor data is None")
-  arr = (ctypes_type * ggml.ggml_nelements(tensor)).from_address(data)
-  strides = tuple(reversed(tensor.contents.nb[:n_dims]))
-  output = np.ctypeslib.as_array(arr)
-  output.dtype = np_type
-  return np.lib.stride_tricks.as_strided(output, shape=shape, strides=strides), ctx

@unittest.skipIf(any(not is_dtype_supported(t) for t in [ dtypes.uint8, dtypes.half ]), "Backend must support uint8 and half")
 class TestGGUF(unittest.TestCase):
-  def setUp(self) -> None:
-    params = ggml.ggml_init_params(mem_size=0, mem_buffer=None, no_alloc=False)
-    self.ctx = ctypes.cast(ggml.ggml_init(params), ctypes.POINTER(ctypes.c_void_p))
-  def tearDown(self) -> None: ggml.ggml_free(self.ctx)
-
  def test_load_tinyllama_q8_0(self): self._test_gguf_load("https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q8_0.gguf?download=true")
  def test_load_tinyllama_q4_0(self): self._test_gguf_load("https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q4_0.gguf?download=true")
  def test_load_gpt2_q4_1(self): self._test_gguf_load("https://huggingface.co/PrunaAI/gpt2-GGUF-smashed/resolve/main/gpt2.Q4_1.gguf?download=true")
  def test_load_sample_q6_k(self): self._test_gguf_load("https://huggingface.co/Isotr0py/test-gguf-sample/resolve/main/Quant_Q6_K_1024.gguf?download=true")
-  def test_load_sample_mxfp4(self): self._test_gguf_load("https://huggingface.co/ngxson/boring-testing-tiny/resolve/main/stories260K-mxfp4.gguf?download=true")
-  # NOTE: The test above does not actually test mxfp4 correctness because all the weights in that file are F32
-  def test_dequantization_q4_0(self): self._test_dequantization(ggml.GGML_TYPE_Q4_0)
-  def test_dequantization_q4_1(self): self._test_dequantization(ggml.GGML_TYPE_Q4_1)
-  def test_dequantization_q8_0(self): self._test_dequantization(ggml.GGML_TYPE_Q8_0)
-  def test_dequantization_q4_k(self): self._test_dequantization(ggml.GGML_TYPE_Q4_K)
-  def test_dequantization_q6_k(self): self._test_dequantization(ggml.GGML_TYPE_Q6_K)
+
+  def test_dequantization_q8_0_hardcoded(self):
+    # Q8_0: 2 bytes float16 scale + 32 bytes int8 values, dequant = scale * values
+    block = np.frombuffer(np.float16(2.0).tobytes() + np.arange(1, 33, dtype=np.int8).tobytes(), dtype=np.uint8).copy()
+    expected = np.arange(1, 33, dtype=np.float32) * 2.0
+    np.testing.assert_equal(ggml_data_to_tensor(Tensor(block), 32, GGMLQuantizationType.Q8_0.value).numpy().flatten(), expected)
+
+  def test_dequantization_mxfp4_hardcoded(self):
+    # MXFP4: 1 byte shared exponent E + 16 packed bytes (32 x 4-bit values)
+    # nibble: bit3=sign, bit2:1=exp, bit0=mant; E=128 gives scale=1.0
+    # codes 0-7 = [0, 1, 2, 3, 4, 6, 8, 12], codes 8-15 are their negatives
+    block = np.array([0x80] + list(range(16)), dtype=np.uint8)  # E=128, nibbles 0-15 in low, zeros in high
+    expected = np.array([0., 1., 2., 3., 4., 6., 8., 12., -0., -1., -2., -3., -4., -6., -8., -12.] + [0.]*16, dtype=np.float32)
+    np.testing.assert_equal(ggml_data_to_tensor(Tensor(block), 32, 39).numpy().flatten(), expected)
+
+  def test_dequantization_q4_0(self): self._test_dequantization(GGMLQuantizationType.Q4_0)
+  def test_dequantization_q4_1(self): self._test_dequantization(GGMLQuantizationType.Q4_1)
+  def test_dequantization_q8_0(self): self._test_dequantization(GGMLQuantizationType.Q8_0)
+  def test_dequantization_q4_k(self): self._test_dequantization(GGMLQuantizationType.Q4_K)
+  def test_dequantization_q6_k(self): self._test_dequantization(GGMLQuantizationType.Q6_K)
  def test_dequantization_mxfp4(self):
    MXFP4 = 39

@@ -108,20 +84,20 @@ class TestGGUF(unittest.TestCase):
    with self.assertRaises(ValueError):
      ggml_data_to_tensor(Tensor.empty(512, dtype=dtypes.uint8), 256, 1337)

-  def _test_dequantization(self, ttype: int):
-    type_traits = ggml.ggml_internal_get_type_traits(ttype)
-    n_el, n_bytes = ggml_test_block_count * type_traits.blck_size, ggml_test_block_count * type_traits.type_size
+  def _test_dequantization(self, qtype: GGMLQuantizationType):
+    block_size, type_size = GGML_QUANT_SIZES[qtype]
+    n_el, n_bytes = ggml_test_block_count * block_size, ggml_test_block_count * type_size

-    data_in = (np.random.random((n_el,)).astype(np.float32) * 100 - 50).ctypes.data_as(ctypes.POINTER(ctypes.c_float))
+    try:
+      q_data = quantize((np.random.random((n_el,)).astype(np.float32) * 100 - 50), qtype)
+    except NotImplementedError:
+      q_data = np.random.default_rng(42).integers(0, 256, size=n_bytes, dtype=np.uint8)
+    ref = dequantize(q_data, qtype)

-    c_q_data, c_dq_data = (ctypes.c_char * n_bytes)(0), (ctypes.c_float * n_el)(0)
-    type_traits.from_float(data_in, c_q_data, n_el)
-    type_traits.to_float(c_q_data, c_dq_data, n_el)
+    q_tensor = Tensor(q_data)
+    dq_tensor = ggml_data_to_tensor(q_tensor, n_el, qtype.value).reshape(n_el)

-    q_tensor = Tensor(np.frombuffer(c_q_data, dtype=np.uint8, count=n_bytes))
-    dq_tensor = ggml_data_to_tensor(q_tensor, n_el, ttype).reshape(n_el)
-
-    np.testing.assert_equal(dq_tensor.numpy(), np.frombuffer(c_dq_data, dtype=np.float32))
+    np.testing.assert_equal(dq_tensor.numpy(), ref)

  def _test_gguf_load(self, url: str):
    fp = fetch(url)
@@ -129,24 +105,20 @@ class TestGGUF(unittest.TestCase):
    gguf_tensor = Tensor.empty(model_size, dtype=dtypes.uint8, device=f"disk:{fp}").to(Device.DEFAULT)
    kv_data, tensors = gguf_load(gguf_tensor)

-    gguf_params = ggml.gguf_init_params(ctx=self.ctx, no_alloc=False)
-    gguf_ctx = ggml.gguf_init_from_file(str(fp).encode("utf8"), gguf_params)
-    param_ctx = gguf_params.ctx.contents.value
+    reader = GGUFReader(fp)

-    for ggml_tensor_idx in range(ggml.gguf_get_n_tensors(gguf_ctx)):
-      tensor_name = ggml.gguf_get_tensor_name(gguf_ctx, ggml_tensor_idx)
-      ggml_tensor = ggml.ggml_get_tensor(param_ctx, tensor_name)
-      ggml_tensor_numpy, temp_ctx = ggml_tensor_to_numpy(ggml_tensor)
-      tensor = tensors.get(tensor_name.decode("utf-8"))
-      np.testing.assert_equal(tensor.numpy(), ggml_tensor_numpy)
-      if temp_ctx is not None: ggml.ggml_free(temp_ctx)
+    for rt in reader.tensors:
+      ref = dequantize(rt.data, rt.tensor_type)
+      np.testing.assert_equal(tensors[rt.name].numpy(), ref.reshape(tensors[rt.name].shape))

-    for gguf_key_id in range(ggml.gguf_get_n_kv(gguf_ctx)):
-      v = kv_data[ggml.gguf_get_key(gguf_ctx, gguf_key_id).decode("utf-8")]
-      v_type = ggml.gguf_get_kv_type(gguf_ctx, gguf_key_id)
-      if (get_fn := gguf_val_getters[v_type]) is not None: self.assertEqual(get_fn(gguf_ctx, gguf_key_id), v)
-
-    ggml.gguf_free(gguf_ctx)
+    for k, f in reader.fields.items():
+      if k.startswith("GGUF."): continue  # skip file header keys (version, tensor_count, kv_count)
+      def read_val(i, parts=f.parts, is_str=(f.types[-1] == GGUFValueType.STRING)):
+        return bytes(parts[i]).decode("utf-8") if is_str else parts[i][0].item()
+      if f.types[0] == GGUFValueType.ARRAY:
+        self.assertEqual(kv_data[k], [read_val(i) for i in f.data])
+      else:
+        self.assertEqual(kv_data[k], read_val(-1))

 if __name__ == '__main__':
  unittest.main()